ml-agents/mlagents/trainers/ppo/optimizer.py

Killed 95 out of 138 mutants

Survived

Survived mutation testing. These mutants show holes in your test suite.

Mutant 587

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -21,7 +21,7 @@
         policy.create_tf_graph()
 
         with policy.graph.as_default():
-            with tf.variable_scope("optimizer/"):
+            with tf.variable_scope("XXoptimizer/XX"):
                 super().__init__(policy, trainer_params)
                 hyperparameters: PPOSettings = cast(
                     PPOSettings, trainer_params.hyperparameters

Mutant 600

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -40,7 +40,7 @@
 
                 self.stream_names = list(self.reward_signals.keys())
 
-                self.tf_optimizer_op: Optional[tf.train.Optimizer] = None
+                self.tf_optimizer_op: Optional[tf.train.Optimizer] = ""
                 self.grads = None
                 self.update_batch: Optional[tf.Operation] = None
 

Mutant 601

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -41,7 +41,7 @@
                 self.stream_names = list(self.reward_signals.keys())
 
                 self.tf_optimizer_op: Optional[tf.train.Optimizer] = None
-                self.grads = None
+                self.grads = ""
                 self.update_batch: Optional[tf.Operation] = None
 
                 self.stats_name_to_update_name = {

Mutant 602

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -42,7 +42,7 @@
 
                 self.tf_optimizer_op: Optional[tf.train.Optimizer] = None
                 self.grads = None
-                self.update_batch: Optional[tf.Operation] = None
+                self.update_batch: Optional[tf.Operation] = ""
 
                 self.stats_name_to_update_name = {
                     "Losses/Value Loss": "value_loss",

Mutant 603

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -45,7 +45,7 @@
                 self.update_batch: Optional[tf.Operation] = None
 
                 self.stats_name_to_update_name = {
-                    "Losses/Value Loss": "value_loss",
+                    "XXLosses/Value LossXX": "value_loss",
                     "Losses/Policy Loss": "policy_loss",
                     "Policy/Learning Rate": "learning_rate",
                     "Policy/Epsilon": "decay_epsilon",

Mutant 605

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -46,7 +46,7 @@
 
                 self.stats_name_to_update_name = {
                     "Losses/Value Loss": "value_loss",
-                    "Losses/Policy Loss": "policy_loss",
+                    "XXLosses/Policy LossXX": "policy_loss",
                     "Policy/Learning Rate": "learning_rate",
                     "Policy/Epsilon": "decay_epsilon",
                     "Policy/Beta": "decay_beta",

Mutant 607

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -47,7 +47,7 @@
                 self.stats_name_to_update_name = {
                     "Losses/Value Loss": "value_loss",
                     "Losses/Policy Loss": "policy_loss",
-                    "Policy/Learning Rate": "learning_rate",
+                    "XXPolicy/Learning RateXX": "learning_rate",
                     "Policy/Epsilon": "decay_epsilon",
                     "Policy/Beta": "decay_beta",
                 }

Mutant 609

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -48,7 +48,7 @@
                     "Losses/Value Loss": "value_loss",
                     "Losses/Policy Loss": "policy_loss",
                     "Policy/Learning Rate": "learning_rate",
-                    "Policy/Epsilon": "decay_epsilon",
+                    "XXPolicy/EpsilonXX": "decay_epsilon",
                     "Policy/Beta": "decay_beta",
                 }
                 if self.policy.use_recurrent:

Mutant 611

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -49,7 +49,7 @@
                     "Losses/Policy Loss": "policy_loss",
                     "Policy/Learning Rate": "learning_rate",
                     "Policy/Epsilon": "decay_epsilon",
-                    "Policy/Beta": "decay_beta",
+                    "XXPolicy/BetaXX": "decay_beta",
                 }
                 if self.policy.use_recurrent:
                     self.m_size = self.policy.m_size

Mutant 613

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -59,7 +59,7 @@
                         name="recurrent_value_in",
                     )
 
-                if num_layers < 1:
+                if num_layers <= 1:
                     num_layers = 1
                 if policy.use_continuous_act:
                     self._create_cc_critic(h_size, num_layers, vis_encode_type)

Mutant 614

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -59,7 +59,7 @@
                         name="recurrent_value_in",
                     )
 
-                if num_layers < 1:
+                if num_layers < 2:
                     num_layers = 1
                 if policy.use_continuous_act:
                     self._create_cc_critic(h_size, num_layers, vis_encode_type)

Mutant 615

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -71,7 +71,7 @@
                     lr,
                     self.policy.global_step,
                     int(max_step),
-                    min_value=1e-10,
+                    min_value=1.0000000001,
                 )
                 self._create_losses(
                     self.policy.total_log_probs,

Mutant 618

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -89,7 +89,7 @@
                 {
                     "value_loss": self.value_loss,
                     "policy_loss": self.abs_policy_loss,
-                    "update_batch": self.update_batch,
+                    "XXupdate_batchXX": self.update_batch,
                     "learning_rate": self.learning_rate,
                     "decay_epsilon": self.decay_epsilon,
                     "decay_beta": self.decay_beta,

Mutant 622

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -110,7 +110,7 @@
         hidden_stream = ModelUtils.create_observation_streams(
             self.policy.visual_in,
             self.policy.processed_vector_in,
-            1,
+            2,
             h_size,
             num_layers,
             vis_encode_type,

Mutant 626

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -133,7 +133,7 @@
         self.all_old_log_probs = tf.placeholder(
             shape=[None, sum(self.policy.act_size)],
             dtype=tf.float32,
-            name="old_probabilities",
+            name="XXold_probabilitiesXX",
         )
 
         self.old_log_probs = tf.reduce_sum(

Mutant 628

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -137,7 +137,7 @@
         )
 
         self.old_log_probs = tf.reduce_sum(
-            (tf.identity(self.all_old_log_probs)), axis=1, keepdims=True
+            (tf.identity(self.all_old_log_probs)), axis=1, keepdims=False
         )
 
     def _create_dc_critic(

Mutant 629

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -152,7 +152,7 @@
         hidden_stream = ModelUtils.create_observation_streams(
             self.policy.visual_in,
             self.policy.processed_vector_in,
-            1,
+            2,
             h_size,
             num_layers,
             vis_encode_type,

Mutant 633

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -176,7 +176,7 @@
         self.all_old_log_probs = tf.placeholder(
             shape=[None, sum(self.policy.act_size)],
             dtype=tf.float32,
-            name="old_probabilities",
+            name="XXold_probabilitiesXX",
         )
 
         # Break old log probs into separate branches

Mutant 634

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -188,7 +188,7 @@
             old_log_prob_branches, self.policy.action_masks, self.policy.act_size
         )
 
-        action_idx = [0] + list(np.cumsum(self.policy.act_size))
+        action_idx = [1] + list(np.cumsum(self.policy.act_size))
 
         self.old_log_probs = tf.reduce_sum(
             (

Mutant 649

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -237,7 +237,7 @@
             self.returns_holders[name] = returns_holder
             self.old_values[name] = old_value
         self.advantage = tf.placeholder(
-            shape=[None], dtype=tf.float32, name="advantages"
+            shape=[None], dtype=tf.float32, name="XXadvantagesXX"
         )
         advantage = tf.expand_dims(self.advantage, -1)
 

Mutant 650

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -239,7 +239,7 @@
         self.advantage = tf.placeholder(
             shape=[None], dtype=tf.float32, name="advantages"
         )
-        advantage = tf.expand_dims(self.advantage, -1)
+        advantage = tf.expand_dims(self.advantage, +1)
 
         self.decay_epsilon = ModelUtils.create_schedule(
             self._schedule, epsilon, self.policy.global_step, max_step, min_value=0.1

Mutant 653

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -242,7 +242,7 @@
         advantage = tf.expand_dims(self.advantage, -1)
 
         self.decay_epsilon = ModelUtils.create_schedule(
-            self._schedule, epsilon, self.policy.global_step, max_step, min_value=0.1
+            self._schedule, epsilon, self.policy.global_step, max_step, min_value=1.1
         )
         self.decay_beta = ModelUtils.create_schedule(
             self._schedule, beta, self.policy.global_step, max_step, min_value=1e-5

Mutant 654

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -245,7 +245,7 @@
             self._schedule, epsilon, self.policy.global_step, max_step, min_value=0.1
         )
         self.decay_beta = ModelUtils.create_schedule(
-            self._schedule, beta, self.policy.global_step, max_step, min_value=1e-5
+            self._schedule, beta, self.policy.global_step, max_step, min_value=1.00001
         )
 
         value_losses = []

Mutant 656

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -250,7 +250,7 @@
 
         value_losses = []
         for name, head in value_heads.items():
-            clipped_value_estimate = self.old_values[name] + tf.clip_by_value(
+            clipped_value_estimate = self.old_values[name] - tf.clip_by_value(
                 tf.reduce_sum(head, axis=1) - self.old_values[name],
                 -self.decay_epsilon,
                 self.decay_epsilon,

Mutant 658

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -251,7 +251,7 @@
         value_losses = []
         for name, head in value_heads.items():
             clipped_value_estimate = self.old_values[name] + tf.clip_by_value(
-                tf.reduce_sum(head, axis=1) - self.old_values[name],
+                tf.reduce_sum(head, axis=1) + self.old_values[name],
                 -self.decay_epsilon,
                 self.decay_epsilon,
             )

Mutant 661

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -262,7 +262,7 @@
                 self.returns_holders[name], clipped_value_estimate
             )
             value_loss = tf.reduce_mean(
-                tf.dynamic_partition(tf.maximum(v_opt_a, v_opt_b), self.policy.mask, 2)[
+                tf.dynamic_partition(tf.maximum(v_opt_a, v_opt_b), self.policy.mask, 3)[
                     1
                 ]
             )

Mutant 668

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -273,7 +273,7 @@
         p_opt_a = r_theta * advantage
         p_opt_b = (
             tf.clip_by_value(
-                r_theta, 1.0 - self.decay_epsilon, 1.0 + self.decay_epsilon
+                r_theta, 2.0 - self.decay_epsilon, 1.0 + self.decay_epsilon
             )
             * advantage
         )

Mutant 669

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -273,7 +273,7 @@
         p_opt_a = r_theta * advantage
         p_opt_b = (
             tf.clip_by_value(
-                r_theta, 1.0 - self.decay_epsilon, 1.0 + self.decay_epsilon
+                r_theta, 1.0 + self.decay_epsilon, 1.0 + self.decay_epsilon
             )
             * advantage
         )

Mutant 670

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -273,7 +273,7 @@
         p_opt_a = r_theta * advantage
         p_opt_b = (
             tf.clip_by_value(
-                r_theta, 1.0 - self.decay_epsilon, 1.0 + self.decay_epsilon
+                r_theta, 1.0 - self.decay_epsilon, 2.0 + self.decay_epsilon
             )
             * advantage
         )

Mutant 674

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -278,7 +278,7 @@
             * advantage
         )
         self.policy_loss = -tf.reduce_mean(
-            tf.dynamic_partition(tf.minimum(p_opt_a, p_opt_b), self.policy.mask, 2)[1]
+            tf.dynamic_partition(tf.minimum(p_opt_a, p_opt_b), self.policy.mask, 3)[1]
         )
         # For cleaner stats reporting
         self.abs_policy_loss = tf.abs(self.policy_loss)

Mutant 678

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -285,7 +285,7 @@
 
         self.loss = (
             self.policy_loss
-            + 0.5 * self.value_loss
+            + 1.5 * self.value_loss
             - self.decay_beta
             * tf.reduce_mean(tf.dynamic_partition(entropy, self.policy.mask, 2)[1])
         )

Mutant 680

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -286,7 +286,7 @@
         self.loss = (
             self.policy_loss
             + 0.5 * self.value_loss
-            - self.decay_beta
+            + self.decay_beta
             * tf.reduce_mean(tf.dynamic_partition(entropy, self.policy.mask, 2)[1])
         )
 

Mutant 681

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -287,7 +287,7 @@
             self.policy_loss
             + 0.5 * self.value_loss
             - self.decay_beta
-            * tf.reduce_mean(tf.dynamic_partition(entropy, self.policy.mask, 2)[1])
+            / tf.reduce_mean(tf.dynamic_partition(entropy, self.policy.mask, 2)[1])
         )
 
     def _create_ppo_optimizer_ops(self):

Mutant 682

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -287,7 +287,7 @@
             self.policy_loss
             + 0.5 * self.value_loss
             - self.decay_beta
-            * tf.reduce_mean(tf.dynamic_partition(entropy, self.policy.mask, 2)[1])
+            * tf.reduce_mean(tf.dynamic_partition(entropy, self.policy.mask, 3)[1])
         )
 
     def _create_ppo_optimizer_ops(self):

Mutant 685

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -292,7 +292,7 @@
 
     def _create_ppo_optimizer_ops(self):
         self.tf_optimizer_op = self.create_optimizer_op(self.learning_rate)
-        self.grads = self.tf_optimizer_op.compute_gradients(self.loss)
+        self.grads = None
         self.update_batch = self.tf_optimizer_op.minimize(self.loss)
 
     @timed

Mutant 687

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -295,7 +295,6 @@
         self.grads = self.tf_optimizer_op.compute_gradients(self.loss)
         self.update_batch = self.tf_optimizer_op.minimize(self.loss)
 
-    @timed
     def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:
         """
         Performs update on model.

Mutant 693

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -322,7 +322,7 @@
         self, mini_batch: AgentBuffer, num_sequences: int
     ) -> Dict[tf.Tensor, Any]:
         # Do an optional burn-in for memories
-        num_burn_in = int(self.burn_in_ratio * self.policy.sequence_length)
+        num_burn_in = int(self.burn_in_ratio / self.policy.sequence_length)
         burn_in_mask = np.ones((self.policy.sequence_length), dtype=np.float32)
         burn_in_mask[range(0, num_burn_in)] = 0
         burn_in_mask = np.tile(burn_in_mask, num_sequences)

Mutant 696

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -324,7 +324,7 @@
         # Do an optional burn-in for memories
         num_burn_in = int(self.burn_in_ratio * self.policy.sequence_length)
         burn_in_mask = np.ones((self.policy.sequence_length), dtype=np.float32)
-        burn_in_mask[range(0, num_burn_in)] = 0
+        burn_in_mask[range(1, num_burn_in)] = 0
         burn_in_mask = np.tile(burn_in_mask, num_sequences)
         feed_dict = {
             self.policy.batch_size_ph: num_sequences,

Mutant 697

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -324,7 +324,7 @@
         # Do an optional burn-in for memories
         num_burn_in = int(self.burn_in_ratio * self.policy.sequence_length)
         burn_in_mask = np.ones((self.policy.sequence_length), dtype=np.float32)
-        burn_in_mask[range(0, num_burn_in)] = 0
+        burn_in_mask[range(0, num_burn_in)] = 1
         burn_in_mask = np.tile(burn_in_mask, num_sequences)
         feed_dict = {
             self.policy.batch_size_ph: num_sequences,

Mutant 698

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -324,7 +324,7 @@
         # Do an optional burn-in for memories
         num_burn_in = int(self.burn_in_ratio * self.policy.sequence_length)
         burn_in_mask = np.ones((self.policy.sequence_length), dtype=np.float32)
-        burn_in_mask[range(0, num_burn_in)] = 0
+        burn_in_mask[range(0, num_burn_in)] = None
         burn_in_mask = np.tile(burn_in_mask, num_sequences)
         feed_dict = {
             self.policy.batch_size_ph: num_sequences,

Mutant 701

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -329,7 +329,7 @@
         feed_dict = {
             self.policy.batch_size_ph: num_sequences,
             self.policy.sequence_length_ph: self.policy.sequence_length,
-            self.policy.mask_input: mini_batch["masks"] * burn_in_mask,
+            self.policy.mask_input: mini_batch["masks"] / burn_in_mask,
             self.advantage: mini_batch["advantages"],
             self.all_old_log_probs: mini_batch["action_probs"],
         }

Mutant 709

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -337,7 +337,7 @@
             feed_dict[self.returns_holders[name]] = mini_batch[f"{name}_returns"]
             feed_dict[self.old_values[name]] = mini_batch[f"{name}_value_estimates"]
 
-        if self.policy.output_pre is not None and "actions_pre" in mini_batch:
+        if self.policy.output_pre is not None or "actions_pre" in mini_batch:
             feed_dict[self.policy.output_pre] = mini_batch["actions_pre"]
         else:
             feed_dict[self.policy.output] = mini_batch["actions"]

Mutant 720

--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -346,7 +346,7 @@
             feed_dict[self.policy.action_masks] = mini_batch["action_mask"]
         if "vector_obs" in mini_batch:
             feed_dict[self.policy.vector_in] = mini_batch["vector_obs"]
-        if self.policy.vis_obs_size > 0:
+        if self.policy.vis_obs_size >= 0:
             for i, _ in enumerate(self.policy.visual_in):
                 feed_dict[self.policy.visual_in[i]] = mini_batch["visual_obs%d" % i]
         if self.policy.use_recurrent: