fairseq/modules/downsampled_multihead_attention.py

Killed 0 out of 10 mutants

Survived

Survived mutation testing. These mutants show holes in your test suite.

Mutant 694

--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -20,7 +20,7 @@
     def __init__(
         self, out_channels, embed_dim, head_dim, head_index, dropout=0.,
         bias=True, project_input=True, gated=False, downsample=False,
-        num_heads=1,
+        num_heads=2,
     ):
         super().__init__()
         self.embed_dim = embed_dim

Mutant 695

--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -62,7 +62,7 @@
 
     def forward(
         self, query, key, value, mask_future_timesteps=False,
-        key_padding_mask=None, use_scalar_bias=False,
+        key_padding_mask=None, use_scalar_bias=True,
     ):
         """Input shape: Time x Batch x Channel
         Self-attention can be implemented by passing in the same arguments for

Mutant 696

--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -154,7 +154,7 @@
     """
     def __init__(
         self, out_channels, embed_dim, num_heads, dropout=0., bias=True,
-        project_input=True, gated=False, downsample=False,
+        project_input=False, gated=False, downsample=False,
     ):
         self.embed_dim = embed_dim
         self.num_heads = num_heads

Mutant 697

--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -154,7 +154,7 @@
     """
     def __init__(
         self, out_channels, embed_dim, num_heads, dropout=0., bias=True,
-        project_input=True, gated=False, downsample=False,
+        project_input=True, gated=True, downsample=False,
     ):
         self.embed_dim = embed_dim
         self.num_heads = num_heads

Mutant 698

--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -154,7 +154,7 @@
     """
     def __init__(
         self, out_channels, embed_dim, num_heads, dropout=0., bias=True,
-        project_input=True, gated=False, downsample=False,
+        project_input=True, gated=False, downsample=True,
     ):
         self.embed_dim = embed_dim
         self.num_heads = num_heads

Mutant 699

--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -187,7 +187,7 @@
 
     def forward(
         self, query, key, value, mask_future_timesteps=False,
-        key_padding_mask=None, use_scalar_bias=False,
+        key_padding_mask=None, use_scalar_bias=True,
     ):
         src_len, bsz, embed_dim = key.size()
         tgt_len = query.size(0)

Mutant 700

--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -237,7 +237,7 @@
         return x[::self.index+1]
 
 
-def Linear(in_features, out_features, dropout=0., bias=True):
+def Linear(in_features, out_features, dropout=1.0, bias=True):
     """Weight-normalized Linear layer (input: B x T x C)"""
     m = nn.Linear(in_features, out_features, bias=bias)
     m.weight.data.normal_(mean=0, std=math.sqrt((1 - dropout) / in_features))

Mutant 701

--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -237,7 +237,7 @@
         return x[::self.index+1]
 
 
-def Linear(in_features, out_features, dropout=0., bias=True):
+def Linear(in_features, out_features, dropout=0., bias=False):
     """Weight-normalized Linear layer (input: B x T x C)"""
     m = nn.Linear(in_features, out_features, bias=bias)
     m.weight.data.normal_(mean=0, std=math.sqrt((1 - dropout) / in_features))

Mutant 702

--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -245,7 +245,7 @@
     return nn.utils.weight_norm(m)
 
 
-def GatedLinear(in_features, out_features, dropout=0., bias=True):
+def GatedLinear(in_features, out_features, dropout=1.0, bias=True):
     """Weight-normalized Linear layer (input: B x T x C) with interspersed GLU units"""
     return nn.Sequential(
         Linear(in_features, out_features*4, dropout, bias),

Mutant 703

--- fairseq/modules/downsampled_multihead_attention.py
+++ fairseq/modules/downsampled_multihead_attention.py
@@ -245,7 +245,7 @@
     return nn.utils.weight_norm(m)
 
 
-def GatedLinear(in_features, out_features, dropout=0., bias=True):
+def GatedLinear(in_features, out_features, dropout=0., bias=False):
     """Weight-normalized Linear layer (input: B x T x C) with interspersed GLU units"""
     return nn.Sequential(
         Linear(in_features, out_features*4, dropout, bias),