fairseq/optim/bmuf.py

Killed 0 out of 7 mutants

Survived

Survived mutation testing. These mutants show holes in your test suite.

Mutant 851

--- fairseq/optim/bmuf.py
+++ fairseq/optim/bmuf.py
@@ -34,7 +34,6 @@
         self.average_sync = self.args.average_sync
         self.world_size = self.args.distributed_world_size
 
-    @staticmethod
     def add_args(parser):
         """Add optimizer-specific arguments to the parser."""
         parser.add_argument(

Mutant 852

--- fairseq/optim/bmuf.py
+++ fairseq/optim/bmuf.py
@@ -71,7 +71,6 @@
             help="Specify whether you want to average the local momentum after each sync",
         )
 
-    @property
     def optimizer(self):
         return self._optimizer.optimizer
 

Mutant 853

--- fairseq/optim/bmuf.py
+++ fairseq/optim/bmuf.py
@@ -75,7 +75,6 @@
     def optimizer(self):
         return self._optimizer.optimizer
 
-    @property
     def optimizer_config(self):
         return self._optimizer.optimizer_config
 

Mutant 854

--- fairseq/optim/bmuf.py
+++ fairseq/optim/bmuf.py
@@ -137,7 +137,7 @@
             return True
         return False
 
-    def _warmup_sync(self, root_rank=0):
+    def _warmup_sync(self, root_rank=1):
         if self.world_size <= 1:
             return
         # Broadcast the local model to all gpus

Mutant 855

--- fairseq/optim/bmuf.py
+++ fairseq/optim/bmuf.py
@@ -173,7 +173,6 @@
         """Set the number of parameters updates."""
         self._num_updates = num_updates
 
-    @torch.no_grad()
     def _reset_local_data(self):
         # (Step-0) Initialize global momentum parameters and store global copy on each gpu
         self.global_params = [torch.zeros_like(p.data) for p in self.params]

Mutant 856

--- fairseq/optim/bmuf.py
+++ fairseq/optim/bmuf.py
@@ -184,7 +184,6 @@
         for param, global_param in zip(self.params, self.global_params):
             global_param.copy_(param.data)
 
-    @torch.no_grad()
     def _calc_grad(self):
         # global_params is basically the global copy from the previously finished
         # synchronisation. param.data is local parameter after block_sync_freq

Mutant 857

--- fairseq/optim/bmuf.py
+++ fairseq/optim/bmuf.py
@@ -201,7 +201,6 @@
             sync_para /= float(dist.get_world_size())
             dist.all_reduce(sync_para, op=dist.ReduceOp.SUM)
 
-    @torch.no_grad()
     def _update_global_model(self):
         for index, (param, global_param, smoothed_grad, grad) in enumerate(
             zip(