fairseq/models/roberta/model.py
Killed 9 out of 87 mutantsSurvived
Survived mutation testing. These mutants show holes in your test suite.Mutant 2905
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -29,7 +29,7 @@
from .hub_interface import RobertaHubInterface
-logger = logging.getLogger(__name__)
+logger = None
@register_model('roberta')
Mutant 2908
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -35,7 +35,6 @@
@register_model('roberta')
class RobertaModel(FairseqEncoderModel):
- @classmethod
def hub_models(cls):
return {
'roberta.base': 'http://dl.fbaipublicfiles.com/fairseq/models/roberta.base.tar.gz',
Mutant 2909
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -53,7 +53,6 @@
self.classification_heads = nn.ModuleDict()
- @staticmethod
def add_args(parser):
"""Add model-specific arguments to the parser."""
parser.add_argument('--encoder-layers', type=int, metavar='L',
Mutant 2910
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -56,7 +56,7 @@
@staticmethod
def add_args(parser):
"""Add model-specific arguments to the parser."""
- parser.add_argument('--encoder-layers', type=int, metavar='L',
+ parser.add_argument('XX--encoder-layersXX', type=int, metavar='L',
help='num encoder layers')
parser.add_argument('--encoder-embed-dim', type=int, metavar='H',
help='encoder embedding dimension')
Mutant 2911
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -56,7 +56,7 @@
@staticmethod
def add_args(parser):
"""Add model-specific arguments to the parser."""
- parser.add_argument('--encoder-layers', type=int, metavar='L',
+ parser.add_argument('--encoder-layers', type=int, metavar='XXLXX',
help='num encoder layers')
parser.add_argument('--encoder-embed-dim', type=int, metavar='H',
help='encoder embedding dimension')
Mutant 2912
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -57,7 +57,7 @@
def add_args(parser):
"""Add model-specific arguments to the parser."""
parser.add_argument('--encoder-layers', type=int, metavar='L',
- help='num encoder layers')
+ help='XXnum encoder layersXX')
parser.add_argument('--encoder-embed-dim', type=int, metavar='H',
help='encoder embedding dimension')
parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='F',
Mutant 2913
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -58,7 +58,7 @@
"""Add model-specific arguments to the parser."""
parser.add_argument('--encoder-layers', type=int, metavar='L',
help='num encoder layers')
- parser.add_argument('--encoder-embed-dim', type=int, metavar='H',
+ parser.add_argument('XX--encoder-embed-dimXX', type=int, metavar='H',
help='encoder embedding dimension')
parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='F',
help='encoder embedding dimension for FFN')
Mutant 2914
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -58,7 +58,7 @@
"""Add model-specific arguments to the parser."""
parser.add_argument('--encoder-layers', type=int, metavar='L',
help='num encoder layers')
- parser.add_argument('--encoder-embed-dim', type=int, metavar='H',
+ parser.add_argument('--encoder-embed-dim', type=int, metavar='XXHXX',
help='encoder embedding dimension')
parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='F',
help='encoder embedding dimension for FFN')
Mutant 2915
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -59,7 +59,7 @@
parser.add_argument('--encoder-layers', type=int, metavar='L',
help='num encoder layers')
parser.add_argument('--encoder-embed-dim', type=int, metavar='H',
- help='encoder embedding dimension')
+ help='XXencoder embedding dimensionXX')
parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='F',
help='encoder embedding dimension for FFN')
parser.add_argument('--encoder-attention-heads', type=int, metavar='A',
Mutant 2916
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -60,7 +60,7 @@
help='num encoder layers')
parser.add_argument('--encoder-embed-dim', type=int, metavar='H',
help='encoder embedding dimension')
- parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='F',
+ parser.add_argument('XX--encoder-ffn-embed-dimXX', type=int, metavar='F',
help='encoder embedding dimension for FFN')
parser.add_argument('--encoder-attention-heads', type=int, metavar='A',
help='num encoder attention heads')
Mutant 2917
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -60,7 +60,7 @@
help='num encoder layers')
parser.add_argument('--encoder-embed-dim', type=int, metavar='H',
help='encoder embedding dimension')
- parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='F',
+ parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='XXFXX',
help='encoder embedding dimension for FFN')
parser.add_argument('--encoder-attention-heads', type=int, metavar='A',
help='num encoder attention heads')
Mutant 2918
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -61,7 +61,7 @@
parser.add_argument('--encoder-embed-dim', type=int, metavar='H',
help='encoder embedding dimension')
parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='F',
- help='encoder embedding dimension for FFN')
+ help='XXencoder embedding dimension for FFNXX')
parser.add_argument('--encoder-attention-heads', type=int, metavar='A',
help='num encoder attention heads')
parser.add_argument('--activation-fn',
Mutant 2919
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -62,7 +62,7 @@
help='encoder embedding dimension')
parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='F',
help='encoder embedding dimension for FFN')
- parser.add_argument('--encoder-attention-heads', type=int, metavar='A',
+ parser.add_argument('XX--encoder-attention-headsXX', type=int, metavar='A',
help='num encoder attention heads')
parser.add_argument('--activation-fn',
choices=utils.get_available_activation_fns(),
Mutant 2920
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -62,7 +62,7 @@
help='encoder embedding dimension')
parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='F',
help='encoder embedding dimension for FFN')
- parser.add_argument('--encoder-attention-heads', type=int, metavar='A',
+ parser.add_argument('--encoder-attention-heads', type=int, metavar='XXAXX',
help='num encoder attention heads')
parser.add_argument('--activation-fn',
choices=utils.get_available_activation_fns(),
Mutant 2921
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -63,7 +63,7 @@
parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='F',
help='encoder embedding dimension for FFN')
parser.add_argument('--encoder-attention-heads', type=int, metavar='A',
- help='num encoder attention heads')
+ help='XXnum encoder attention headsXX')
parser.add_argument('--activation-fn',
choices=utils.get_available_activation_fns(),
help='activation function to use')
Mutant 2922
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -64,7 +64,7 @@
help='encoder embedding dimension for FFN')
parser.add_argument('--encoder-attention-heads', type=int, metavar='A',
help='num encoder attention heads')
- parser.add_argument('--activation-fn',
+ parser.add_argument('XX--activation-fnXX',
choices=utils.get_available_activation_fns(),
help='activation function to use')
parser.add_argument('--pooler-activation-fn',
Mutant 2923
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -66,7 +66,7 @@
help='num encoder attention heads')
parser.add_argument('--activation-fn',
choices=utils.get_available_activation_fns(),
- help='activation function to use')
+ help='XXactivation function to useXX')
parser.add_argument('--pooler-activation-fn',
choices=utils.get_available_activation_fns(),
help='activation function to use for pooler layer')
Mutant 2924
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -67,7 +67,7 @@
parser.add_argument('--activation-fn',
choices=utils.get_available_activation_fns(),
help='activation function to use')
- parser.add_argument('--pooler-activation-fn',
+ parser.add_argument('XX--pooler-activation-fnXX',
choices=utils.get_available_activation_fns(),
help='activation function to use for pooler layer')
parser.add_argument('--encoder-normalize-before', action='store_true',
Mutant 2925
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -69,7 +69,7 @@
help='activation function to use')
parser.add_argument('--pooler-activation-fn',
choices=utils.get_available_activation_fns(),
- help='activation function to use for pooler layer')
+ help='XXactivation function to use for pooler layerXX')
parser.add_argument('--encoder-normalize-before', action='store_true',
help='apply layernorm before each encoder block')
parser.add_argument('--dropout', type=float, metavar='D',
Mutant 2926
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -70,7 +70,7 @@
parser.add_argument('--pooler-activation-fn',
choices=utils.get_available_activation_fns(),
help='activation function to use for pooler layer')
- parser.add_argument('--encoder-normalize-before', action='store_true',
+ parser.add_argument('XX--encoder-normalize-beforeXX', action='store_true',
help='apply layernorm before each encoder block')
parser.add_argument('--dropout', type=float, metavar='D',
help='dropout probability')
Mutant 2928
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -71,7 +71,7 @@
choices=utils.get_available_activation_fns(),
help='activation function to use for pooler layer')
parser.add_argument('--encoder-normalize-before', action='store_true',
- help='apply layernorm before each encoder block')
+ help='XXapply layernorm before each encoder blockXX')
parser.add_argument('--dropout', type=float, metavar='D',
help='dropout probability')
parser.add_argument('--attention-dropout', type=float, metavar='D',
Mutant 2929
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -72,7 +72,7 @@
help='activation function to use for pooler layer')
parser.add_argument('--encoder-normalize-before', action='store_true',
help='apply layernorm before each encoder block')
- parser.add_argument('--dropout', type=float, metavar='D',
+ parser.add_argument('XX--dropoutXX', type=float, metavar='D',
help='dropout probability')
parser.add_argument('--attention-dropout', type=float, metavar='D',
help='dropout probability for attention weights')
Mutant 2930
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -72,7 +72,7 @@
help='activation function to use for pooler layer')
parser.add_argument('--encoder-normalize-before', action='store_true',
help='apply layernorm before each encoder block')
- parser.add_argument('--dropout', type=float, metavar='D',
+ parser.add_argument('--dropout', type=float, metavar='XXDXX',
help='dropout probability')
parser.add_argument('--attention-dropout', type=float, metavar='D',
help='dropout probability for attention weights')
Mutant 2931
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -73,7 +73,7 @@
parser.add_argument('--encoder-normalize-before', action='store_true',
help='apply layernorm before each encoder block')
parser.add_argument('--dropout', type=float, metavar='D',
- help='dropout probability')
+ help='XXdropout probabilityXX')
parser.add_argument('--attention-dropout', type=float, metavar='D',
help='dropout probability for attention weights')
parser.add_argument('--activation-dropout', type=float, metavar='D',
Mutant 2932
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -74,7 +74,7 @@
help='apply layernorm before each encoder block')
parser.add_argument('--dropout', type=float, metavar='D',
help='dropout probability')
- parser.add_argument('--attention-dropout', type=float, metavar='D',
+ parser.add_argument('XX--attention-dropoutXX', type=float, metavar='D',
help='dropout probability for attention weights')
parser.add_argument('--activation-dropout', type=float, metavar='D',
help='dropout probability after activation in FFN')
Mutant 2933
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -74,7 +74,7 @@
help='apply layernorm before each encoder block')
parser.add_argument('--dropout', type=float, metavar='D',
help='dropout probability')
- parser.add_argument('--attention-dropout', type=float, metavar='D',
+ parser.add_argument('--attention-dropout', type=float, metavar='XXDXX',
help='dropout probability for attention weights')
parser.add_argument('--activation-dropout', type=float, metavar='D',
help='dropout probability after activation in FFN')
Mutant 2934
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -75,7 +75,7 @@
parser.add_argument('--dropout', type=float, metavar='D',
help='dropout probability')
parser.add_argument('--attention-dropout', type=float, metavar='D',
- help='dropout probability for attention weights')
+ help='XXdropout probability for attention weightsXX')
parser.add_argument('--activation-dropout', type=float, metavar='D',
help='dropout probability after activation in FFN')
parser.add_argument('--pooler-dropout', type=float, metavar='D',
Mutant 2935
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -76,7 +76,7 @@
help='dropout probability')
parser.add_argument('--attention-dropout', type=float, metavar='D',
help='dropout probability for attention weights')
- parser.add_argument('--activation-dropout', type=float, metavar='D',
+ parser.add_argument('XX--activation-dropoutXX', type=float, metavar='D',
help='dropout probability after activation in FFN')
parser.add_argument('--pooler-dropout', type=float, metavar='D',
help='dropout probability in the masked_lm pooler layers')
Mutant 2936
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -76,7 +76,7 @@
help='dropout probability')
parser.add_argument('--attention-dropout', type=float, metavar='D',
help='dropout probability for attention weights')
- parser.add_argument('--activation-dropout', type=float, metavar='D',
+ parser.add_argument('--activation-dropout', type=float, metavar='XXDXX',
help='dropout probability after activation in FFN')
parser.add_argument('--pooler-dropout', type=float, metavar='D',
help='dropout probability in the masked_lm pooler layers')
Mutant 2937
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -77,7 +77,7 @@
parser.add_argument('--attention-dropout', type=float, metavar='D',
help='dropout probability for attention weights')
parser.add_argument('--activation-dropout', type=float, metavar='D',
- help='dropout probability after activation in FFN')
+ help='XXdropout probability after activation in FFNXX')
parser.add_argument('--pooler-dropout', type=float, metavar='D',
help='dropout probability in the masked_lm pooler layers')
parser.add_argument('--max-positions', type=int,
Mutant 2938
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -78,7 +78,7 @@
help='dropout probability for attention weights')
parser.add_argument('--activation-dropout', type=float, metavar='D',
help='dropout probability after activation in FFN')
- parser.add_argument('--pooler-dropout', type=float, metavar='D',
+ parser.add_argument('XX--pooler-dropoutXX', type=float, metavar='D',
help='dropout probability in the masked_lm pooler layers')
parser.add_argument('--max-positions', type=int,
help='number of positional embeddings to learn')
Mutant 2939
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -78,7 +78,7 @@
help='dropout probability for attention weights')
parser.add_argument('--activation-dropout', type=float, metavar='D',
help='dropout probability after activation in FFN')
- parser.add_argument('--pooler-dropout', type=float, metavar='D',
+ parser.add_argument('--pooler-dropout', type=float, metavar='XXDXX',
help='dropout probability in the masked_lm pooler layers')
parser.add_argument('--max-positions', type=int,
help='number of positional embeddings to learn')
Mutant 2940
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -79,7 +79,7 @@
parser.add_argument('--activation-dropout', type=float, metavar='D',
help='dropout probability after activation in FFN')
parser.add_argument('--pooler-dropout', type=float, metavar='D',
- help='dropout probability in the masked_lm pooler layers')
+ help='XXdropout probability in the masked_lm pooler layersXX')
parser.add_argument('--max-positions', type=int,
help='number of positional embeddings to learn')
parser.add_argument('--load-checkpoint-heads', action='store_true',
Mutant 2941
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -80,7 +80,7 @@
help='dropout probability after activation in FFN')
parser.add_argument('--pooler-dropout', type=float, metavar='D',
help='dropout probability in the masked_lm pooler layers')
- parser.add_argument('--max-positions', type=int,
+ parser.add_argument('XX--max-positionsXX', type=int,
help='number of positional embeddings to learn')
parser.add_argument('--load-checkpoint-heads', action='store_true',
help='(re-)register and load heads when loading checkpoints')
Mutant 2942
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -81,7 +81,7 @@
parser.add_argument('--pooler-dropout', type=float, metavar='D',
help='dropout probability in the masked_lm pooler layers')
parser.add_argument('--max-positions', type=int,
- help='number of positional embeddings to learn')
+ help='XXnumber of positional embeddings to learnXX')
parser.add_argument('--load-checkpoint-heads', action='store_true',
help='(re-)register and load heads when loading checkpoints')
# args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
Mutant 2943
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -82,7 +82,7 @@
help='dropout probability in the masked_lm pooler layers')
parser.add_argument('--max-positions', type=int,
help='number of positional embeddings to learn')
- parser.add_argument('--load-checkpoint-heads', action='store_true',
+ parser.add_argument('XX--load-checkpoint-headsXX', action='store_true',
help='(re-)register and load heads when loading checkpoints')
# args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
parser.add_argument('--encoder-layerdrop', type=float, metavar='D', default=0,
Mutant 2945
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -83,7 +83,7 @@
parser.add_argument('--max-positions', type=int,
help='number of positional embeddings to learn')
parser.add_argument('--load-checkpoint-heads', action='store_true',
- help='(re-)register and load heads when loading checkpoints')
+ help='XX(re-)register and load heads when loading checkpointsXX')
# args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
parser.add_argument('--encoder-layerdrop', type=float, metavar='D', default=0,
help='LayerDrop probability for encoder')
Mutant 2946
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -85,7 +85,7 @@
parser.add_argument('--load-checkpoint-heads', action='store_true',
help='(re-)register and load heads when loading checkpoints')
# args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
- parser.add_argument('--encoder-layerdrop', type=float, metavar='D', default=0,
+ parser.add_argument('XX--encoder-layerdropXX', type=float, metavar='D', default=0,
help='LayerDrop probability for encoder')
parser.add_argument('--encoder-layers-to-keep', default=None,
help='which layers to *keep* when pruning as a comma-separated list')
Mutant 2947
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -85,7 +85,7 @@
parser.add_argument('--load-checkpoint-heads', action='store_true',
help='(re-)register and load heads when loading checkpoints')
# args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
- parser.add_argument('--encoder-layerdrop', type=float, metavar='D', default=0,
+ parser.add_argument('--encoder-layerdrop', type=float, metavar='XXDXX', default=0,
help='LayerDrop probability for encoder')
parser.add_argument('--encoder-layers-to-keep', default=None,
help='which layers to *keep* when pruning as a comma-separated list')
Mutant 2948
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -85,7 +85,7 @@
parser.add_argument('--load-checkpoint-heads', action='store_true',
help='(re-)register and load heads when loading checkpoints')
# args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
- parser.add_argument('--encoder-layerdrop', type=float, metavar='D', default=0,
+ parser.add_argument('--encoder-layerdrop', type=float, metavar='D', default=1,
help='LayerDrop probability for encoder')
parser.add_argument('--encoder-layers-to-keep', default=None,
help='which layers to *keep* when pruning as a comma-separated list')
Mutant 2949
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -86,7 +86,7 @@
help='(re-)register and load heads when loading checkpoints')
# args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
parser.add_argument('--encoder-layerdrop', type=float, metavar='D', default=0,
- help='LayerDrop probability for encoder')
+ help='XXLayerDrop probability for encoderXX')
parser.add_argument('--encoder-layers-to-keep', default=None,
help='which layers to *keep* when pruning as a comma-separated list')
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
Mutant 2950
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -87,7 +87,7 @@
# args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
parser.add_argument('--encoder-layerdrop', type=float, metavar='D', default=0,
help='LayerDrop probability for encoder')
- parser.add_argument('--encoder-layers-to-keep', default=None,
+ parser.add_argument('XX--encoder-layers-to-keepXX', default=None,
help='which layers to *keep* when pruning as a comma-separated list')
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
Mutant 2951
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -88,7 +88,7 @@
parser.add_argument('--encoder-layerdrop', type=float, metavar='D', default=0,
help='LayerDrop probability for encoder')
parser.add_argument('--encoder-layers-to-keep', default=None,
- help='which layers to *keep* when pruning as a comma-separated list')
+ help='XXwhich layers to *keep* when pruning as a comma-separated listXX')
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
help='iterative PQ quantization noise at training time')
Mutant 2952
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -90,7 +90,7 @@
parser.add_argument('--encoder-layers-to-keep', default=None,
help='which layers to *keep* when pruning as a comma-separated list')
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
- parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
+ parser.add_argument('XX--quant-noise-pqXX', type=float, metavar='D', default=0,
help='iterative PQ quantization noise at training time')
parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
help='block size of quantization noise at training time')
Mutant 2953
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -90,7 +90,7 @@
parser.add_argument('--encoder-layers-to-keep', default=None,
help='which layers to *keep* when pruning as a comma-separated list')
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
- parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
+ parser.add_argument('--quant-noise-pq', type=float, metavar='XXDXX', default=0,
help='iterative PQ quantization noise at training time')
parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
help='block size of quantization noise at training time')
Mutant 2954
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -90,7 +90,7 @@
parser.add_argument('--encoder-layers-to-keep', default=None,
help='which layers to *keep* when pruning as a comma-separated list')
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
- parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
+ parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=1,
help='iterative PQ quantization noise at training time')
parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
help='block size of quantization noise at training time')
Mutant 2955
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -91,7 +91,7 @@
help='which layers to *keep* when pruning as a comma-separated list')
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
- help='iterative PQ quantization noise at training time')
+ help='XXiterative PQ quantization noise at training timeXX')
parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
help='block size of quantization noise at training time')
parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
Mutant 2956
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -92,7 +92,7 @@
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
help='iterative PQ quantization noise at training time')
- parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
+ parser.add_argument('XX--quant-noise-pq-block-sizeXX', type=int, metavar='D', default=8,
help='block size of quantization noise at training time')
parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
help='scalar quantization noise and scalar quantization at training time')
Mutant 2957
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -92,7 +92,7 @@
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
help='iterative PQ quantization noise at training time')
- parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
+ parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='XXDXX', default=8,
help='block size of quantization noise at training time')
parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
help='scalar quantization noise and scalar quantization at training time')
Mutant 2958
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -92,7 +92,7 @@
# args for Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)
parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
help='iterative PQ quantization noise at training time')
- parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
+ parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=9,
help='block size of quantization noise at training time')
parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
help='scalar quantization noise and scalar quantization at training time')
Mutant 2959
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -93,7 +93,7 @@
parser.add_argument('--quant-noise-pq', type=float, metavar='D', default=0,
help='iterative PQ quantization noise at training time')
parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
- help='block size of quantization noise at training time')
+ help='XXblock size of quantization noise at training timeXX')
parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
help='scalar quantization noise and scalar quantization at training time')
parser.add_argument('--untie-weights-roberta', action='store_true',
Mutant 2960
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -94,7 +94,7 @@
help='iterative PQ quantization noise at training time')
parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
help='block size of quantization noise at training time')
- parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
+ parser.add_argument('XX--quant-noise-scalarXX', type=float, metavar='D', default=0,
help='scalar quantization noise and scalar quantization at training time')
parser.add_argument('--untie-weights-roberta', action='store_true',
help='Untie weights between embeddings and classifiers in RoBERTa')
Mutant 2961
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -94,7 +94,7 @@
help='iterative PQ quantization noise at training time')
parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
help='block size of quantization noise at training time')
- parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
+ parser.add_argument('--quant-noise-scalar', type=float, metavar='XXDXX', default=0,
help='scalar quantization noise and scalar quantization at training time')
parser.add_argument('--untie-weights-roberta', action='store_true',
help='Untie weights between embeddings and classifiers in RoBERTa')
Mutant 2962
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -94,7 +94,7 @@
help='iterative PQ quantization noise at training time')
parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
help='block size of quantization noise at training time')
- parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
+ parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=1,
help='scalar quantization noise and scalar quantization at training time')
parser.add_argument('--untie-weights-roberta', action='store_true',
help='Untie weights between embeddings and classifiers in RoBERTa')
Mutant 2963
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -95,7 +95,7 @@
parser.add_argument('--quant-noise-pq-block-size', type=int, metavar='D', default=8,
help='block size of quantization noise at training time')
parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
- help='scalar quantization noise and scalar quantization at training time')
+ help='XXscalar quantization noise and scalar quantization at training timeXX')
parser.add_argument('--untie-weights-roberta', action='store_true',
help='Untie weights between embeddings and classifiers in RoBERTa')
Mutant 2964
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -96,7 +96,7 @@
help='block size of quantization noise at training time')
parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
help='scalar quantization noise and scalar quantization at training time')
- parser.add_argument('--untie-weights-roberta', action='store_true',
+ parser.add_argument('XX--untie-weights-robertaXX', action='store_true',
help='Untie weights between embeddings and classifiers in RoBERTa')
@classmethod
Mutant 2966
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -97,7 +97,7 @@
parser.add_argument('--quant-noise-scalar', type=float, metavar='D', default=0,
help='scalar quantization noise and scalar quantization at training time')
parser.add_argument('--untie-weights-roberta', action='store_true',
- help='Untie weights between embeddings and classifiers in RoBERTa')
+ help='XXUntie weights between embeddings and classifiers in RoBERTaXX')
@classmethod
def build_model(cls, args, task):
Mutant 2967
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -99,7 +99,6 @@
parser.add_argument('--untie-weights-roberta', action='store_true',
help='Untie weights between embeddings and classifiers in RoBERTa')
- @classmethod
def build_model(cls, args, task):
"""Build a new model instance."""
Mutant 2968
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -112,7 +112,7 @@
encoder = RobertaEncoder(args, task.source_dictionary)
return cls(args, encoder)
- def forward(self, src_tokens, features_only=False, return_all_hiddens=False, classification_head_name=None, **kwargs):
+ def forward(self, src_tokens, features_only=True, return_all_hiddens=False, classification_head_name=None, **kwargs):
if classification_head_name is not None:
features_only = True
Mutant 2969
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -112,7 +112,7 @@
encoder = RobertaEncoder(args, task.source_dictionary)
return cls(args, encoder)
- def forward(self, src_tokens, features_only=False, return_all_hiddens=False, classification_head_name=None, **kwargs):
+ def forward(self, src_tokens, features_only=False, return_all_hiddens=True, classification_head_name=None, **kwargs):
if classification_head_name is not None:
features_only = True
Mutant 2970
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -152,7 +152,6 @@
self.args.quant_noise_pq_block_size,
)
- @property
def supported_targets(self):
return {'self'}
Mutant 2971
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -156,7 +156,6 @@
def supported_targets(self):
return {'self'}
- @classmethod
def from_pretrained(cls, model_name_or_path, checkpoint_file='model.pt', data_name_or_path='.', bpe='gpt2', **kwargs):
from fairseq import hub_utils
x = hub_utils.from_pretrained(
Mutant 2972
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -157,7 +157,7 @@
return {'self'}
@classmethod
- def from_pretrained(cls, model_name_or_path, checkpoint_file='model.pt', data_name_or_path='.', bpe='gpt2', **kwargs):
+ def from_pretrained(cls, model_name_or_path, checkpoint_file='XXmodel.ptXX', data_name_or_path='.', bpe='gpt2', **kwargs):
from fairseq import hub_utils
x = hub_utils.from_pretrained(
model_name_or_path,
Mutant 2973
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -157,7 +157,7 @@
return {'self'}
@classmethod
- def from_pretrained(cls, model_name_or_path, checkpoint_file='model.pt', data_name_or_path='.', bpe='gpt2', **kwargs):
+ def from_pretrained(cls, model_name_or_path, checkpoint_file='model.pt', data_name_or_path='XX.XX', bpe='gpt2', **kwargs):
from fairseq import hub_utils
x = hub_utils.from_pretrained(
model_name_or_path,
Mutant 2974
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -157,7 +157,7 @@
return {'self'}
@classmethod
- def from_pretrained(cls, model_name_or_path, checkpoint_file='model.pt', data_name_or_path='.', bpe='gpt2', **kwargs):
+ def from_pretrained(cls, model_name_or_path, checkpoint_file='model.pt', data_name_or_path='.', bpe='XXgpt2XX', **kwargs):
from fairseq import hub_utils
x = hub_utils.from_pretrained(
model_name_or_path,
Mutant 2975
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -260,7 +260,7 @@
class RobertaClassificationHead(nn.Module):
"""Head for sentence-level classification tasks."""
- def __init__(self, input_dim, inner_dim, num_classes, activation_fn, pooler_dropout, q_noise=0, qn_block_size=8):
+ def __init__(self, input_dim, inner_dim, num_classes, activation_fn, pooler_dropout, q_noise=1, qn_block_size=8):
super().__init__()
self.dense = nn.Linear(input_dim, inner_dim)
self.activation_fn = utils.get_activation_fn(activation_fn)
Mutant 2976
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -260,7 +260,7 @@
class RobertaClassificationHead(nn.Module):
"""Head for sentence-level classification tasks."""
- def __init__(self, input_dim, inner_dim, num_classes, activation_fn, pooler_dropout, q_noise=0, qn_block_size=8):
+ def __init__(self, input_dim, inner_dim, num_classes, activation_fn, pooler_dropout, q_noise=0, qn_block_size=9):
super().__init__()
self.dense = nn.Linear(input_dim, inner_dim)
self.activation_fn = utils.get_activation_fn(activation_fn)
Mutant 2977
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -317,7 +317,7 @@
weight=self.sentence_encoder.embed_tokens.weight if not args.untie_weights_roberta else None,
)
- def forward(self, src_tokens, features_only=False, return_all_hiddens=False, masked_tokens=None, **unused):
+ def forward(self, src_tokens, features_only=True, return_all_hiddens=False, masked_tokens=None, **unused):
"""
Args:
src_tokens (LongTensor): input tokens of shape `(batch, src_len)`
Mutant 2978
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -317,7 +317,7 @@
weight=self.sentence_encoder.embed_tokens.weight if not args.untie_weights_roberta else None,
)
- def forward(self, src_tokens, features_only=False, return_all_hiddens=False, masked_tokens=None, **unused):
+ def forward(self, src_tokens, features_only=False, return_all_hiddens=True, masked_tokens=None, **unused):
"""
Args:
src_tokens (LongTensor): input tokens of shape `(batch, src_len)`
Mutant 2979
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -339,7 +339,7 @@
x = self.output_layer(x, masked_tokens=masked_tokens)
return x, extra
- def extract_features(self, src_tokens, return_all_hiddens=False, **unused):
+ def extract_features(self, src_tokens, return_all_hiddens=True, **unused):
inner_states, _ = self.sentence_encoder(
src_tokens,
last_state_only=not return_all_hiddens,
Mutant 2981
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -355,7 +355,7 @@
return self.args.max_positions
-@register_model_architecture('roberta', 'roberta')
+@register_model_architecture('roberta', 'XXrobertaXX')
def base_architecture(args):
args.encoder_layers = getattr(args, 'encoder_layers', 12)
args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 768)
Mutant 2982
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -354,8 +354,6 @@
"""Maximum output length supported by the encoder."""
return self.args.max_positions
-
-@register_model_architecture('roberta', 'roberta')
def base_architecture(args):
args.encoder_layers = getattr(args, 'encoder_layers', 12)
args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 768)
Mutant 2984
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -373,7 +373,7 @@
args.encoder_layerdrop = getattr(args, 'encoder_layerdrop', 0.0)
-@register_model_architecture('roberta', 'roberta_base')
+@register_model_architecture('roberta', 'XXroberta_baseXX')
def roberta_base_architecture(args):
base_architecture(args)
Mutant 2985
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -372,8 +372,6 @@
args.encoder_layers_to_keep = getattr(args, 'encoder_layers_to_keep', None)
args.encoder_layerdrop = getattr(args, 'encoder_layerdrop', 0.0)
-
-@register_model_architecture('roberta', 'roberta_base')
def roberta_base_architecture(args):
base_architecture(args)
Mutant 2987
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -378,7 +378,7 @@
base_architecture(args)
-@register_model_architecture('roberta', 'roberta_large')
+@register_model_architecture('roberta', 'XXroberta_largeXX')
def roberta_large_architecture(args):
args.encoder_layers = getattr(args, 'encoder_layers', 24)
args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 1024)
Mutant 2988
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -377,8 +377,6 @@
def roberta_base_architecture(args):
base_architecture(args)
-
-@register_model_architecture('roberta', 'roberta_large')
def roberta_large_architecture(args):
args.encoder_layers = getattr(args, 'encoder_layers', 24)
args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 1024)
Mutant 2990
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -387,7 +387,7 @@
base_architecture(args)
-@register_model_architecture('roberta', 'xlm')
+@register_model_architecture('roberta', 'XXxlmXX')
def xlm_architecture(args):
args.encoder_layers = getattr(args, 'encoder_layers', 16)
args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 1280)
Mutant 2991
--- fairseq/models/roberta/model.py
+++ fairseq/models/roberta/model.py
@@ -386,8 +386,6 @@
args.encoder_attention_heads = getattr(args, 'encoder_attention_heads', 16)
base_architecture(args)
-
-@register_model_architecture('roberta', 'xlm')
def xlm_architecture(args):
args.encoder_layers = getattr(args, 'encoder_layers', 16)
args.encoder_embed_dim = getattr(args, 'encoder_embed_dim', 1280)