fairseq/modules/downsampled_multihead_attention.py
Killed 0 out of 10 mutantsSurvived
Survived mutation testing. These mutants show holes in your test suite.Mutant 1965
--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -20,7 +20,7 @@
def __init__(
self, out_channels, embed_dim, head_dim, head_index, dropout=0.,
bias=True, project_input=True, gated=False, downsample=False,
- num_heads=1,
+ num_heads=2,
):
super().__init__()
self.embed_dim = embed_dim
Mutant 1966
--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -62,7 +62,7 @@
def forward(
self, query, key, value, mask_future_timesteps=False,
- key_padding_mask=None, use_scalar_bias=False,
+ key_padding_mask=None, use_scalar_bias=True,
):
"""Input shape: Time x Batch x Channel
Self-attention can be implemented by passing in the same arguments for
Mutant 1967
--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -154,7 +154,7 @@
"""
def __init__(
self, out_channels, embed_dim, num_heads, dropout=0., bias=True,
- project_input=True, gated=False, downsample=False,
+ project_input=False, gated=False, downsample=False,
):
self.embed_dim = embed_dim
self.num_heads = num_heads
Mutant 1968
--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -154,7 +154,7 @@
"""
def __init__(
self, out_channels, embed_dim, num_heads, dropout=0., bias=True,
- project_input=True, gated=False, downsample=False,
+ project_input=True, gated=True, downsample=False,
):
self.embed_dim = embed_dim
self.num_heads = num_heads
Mutant 1969
--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -154,7 +154,7 @@
"""
def __init__(
self, out_channels, embed_dim, num_heads, dropout=0., bias=True,
- project_input=True, gated=False, downsample=False,
+ project_input=True, gated=False, downsample=True,
):
self.embed_dim = embed_dim
self.num_heads = num_heads
Mutant 1970
--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -187,7 +187,7 @@
def forward(
self, query, key, value, mask_future_timesteps=False,
- key_padding_mask=None, use_scalar_bias=False,
+ key_padding_mask=None, use_scalar_bias=True,
):
src_len, bsz, embed_dim = key.size()
tgt_len = query.size(0)
Mutant 1971
--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -237,7 +237,7 @@
return x[::self.index+1]
-def Linear(in_features, out_features, dropout=0., bias=True):
+def Linear(in_features, out_features, dropout=1.0, bias=True):
"""Weight-normalized Linear layer (input: B x T x C)"""
m = nn.Linear(in_features, out_features, bias=bias)
m.weight.data.normal_(mean=0, std=math.sqrt((1 - dropout) / in_features))
Mutant 1972
--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -237,7 +237,7 @@
return x[::self.index+1]
-def Linear(in_features, out_features, dropout=0., bias=True):
+def Linear(in_features, out_features, dropout=0., bias=False):
"""Weight-normalized Linear layer (input: B x T x C)"""
m = nn.Linear(in_features, out_features, bias=bias)
m.weight.data.normal_(mean=0, std=math.sqrt((1 - dropout) / in_features))
Mutant 1973
--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -245,7 +245,7 @@
return nn.utils.weight_norm(m)
-def GatedLinear(in_features, out_features, dropout=0., bias=True):
+def GatedLinear(in_features, out_features, dropout=1.0, bias=True):
"""Weight-normalized Linear layer (input: B x T x C) with interspersed GLU units"""
return nn.Sequential(
Linear(in_features, out_features*4, dropout, bias),
Mutant 1974
--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -245,7 +245,7 @@
return nn.utils.weight_norm(m)
-def GatedLinear(in_features, out_features, dropout=0., bias=True):
+def GatedLinear(in_features, out_features, dropout=0., bias=False):
"""Weight-normalized Linear layer (input: B x T x C) with interspersed GLU units"""
return nn.Sequential(
Linear(in_features, out_features*4, dropout, bias),