ml-agents/mlagents/trainers/ppo/optimizer.py
Killed 96 out of 138 mutantsSurvived
Survived mutation testing. These mutants show holes in your test suite.Mutant 505
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -21,7 +21,7 @@
policy.create_tf_graph()
with policy.graph.as_default():
- with tf.variable_scope("optimizer/"):
+ with tf.variable_scope("XXoptimizer/XX"):
super().__init__(policy, trainer_params)
hyperparameters: PPOSettings = cast(
PPOSettings, trainer_params.hyperparameters
Mutant 518
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -40,7 +40,7 @@
self.stream_names = list(self.reward_signals.keys())
- self.tf_optimizer_op: Optional[tf.train.Optimizer] = None
+ self.tf_optimizer_op: Optional[tf.train.Optimizer] = ""
self.grads = None
self.update_batch: Optional[tf.Operation] = None
Mutant 519
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -41,7 +41,7 @@
self.stream_names = list(self.reward_signals.keys())
self.tf_optimizer_op: Optional[tf.train.Optimizer] = None
- self.grads = None
+ self.grads = ""
self.update_batch: Optional[tf.Operation] = None
self.stats_name_to_update_name = {
Mutant 520
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -42,7 +42,7 @@
self.tf_optimizer_op: Optional[tf.train.Optimizer] = None
self.grads = None
- self.update_batch: Optional[tf.Operation] = None
+ self.update_batch: Optional[tf.Operation] = ""
self.stats_name_to_update_name = {
"Losses/Value Loss": "value_loss",
Mutant 521
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -45,7 +45,7 @@
self.update_batch: Optional[tf.Operation] = None
self.stats_name_to_update_name = {
- "Losses/Value Loss": "value_loss",
+ "XXLosses/Value LossXX": "value_loss",
"Losses/Policy Loss": "policy_loss",
"Policy/Learning Rate": "learning_rate",
"Policy/Epsilon": "decay_epsilon",
Mutant 523
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -46,7 +46,7 @@
self.stats_name_to_update_name = {
"Losses/Value Loss": "value_loss",
- "Losses/Policy Loss": "policy_loss",
+ "XXLosses/Policy LossXX": "policy_loss",
"Policy/Learning Rate": "learning_rate",
"Policy/Epsilon": "decay_epsilon",
"Policy/Beta": "decay_beta",
Mutant 525
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -47,7 +47,7 @@
self.stats_name_to_update_name = {
"Losses/Value Loss": "value_loss",
"Losses/Policy Loss": "policy_loss",
- "Policy/Learning Rate": "learning_rate",
+ "XXPolicy/Learning RateXX": "learning_rate",
"Policy/Epsilon": "decay_epsilon",
"Policy/Beta": "decay_beta",
}
Mutant 527
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -48,7 +48,7 @@
"Losses/Value Loss": "value_loss",
"Losses/Policy Loss": "policy_loss",
"Policy/Learning Rate": "learning_rate",
- "Policy/Epsilon": "decay_epsilon",
+ "XXPolicy/EpsilonXX": "decay_epsilon",
"Policy/Beta": "decay_beta",
}
if self.policy.use_recurrent:
Mutant 529
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -49,7 +49,7 @@
"Losses/Policy Loss": "policy_loss",
"Policy/Learning Rate": "learning_rate",
"Policy/Epsilon": "decay_epsilon",
- "Policy/Beta": "decay_beta",
+ "XXPolicy/BetaXX": "decay_beta",
}
if self.policy.use_recurrent:
self.m_size = self.policy.m_size
Mutant 531
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -59,7 +59,7 @@
name="recurrent_value_in",
)
- if num_layers < 1:
+ if num_layers <= 1:
num_layers = 1
if policy.use_continuous_act:
self._create_cc_critic(h_size, num_layers, vis_encode_type)
Mutant 532
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -59,7 +59,7 @@
name="recurrent_value_in",
)
- if num_layers < 1:
+ if num_layers < 2:
num_layers = 1
if policy.use_continuous_act:
self._create_cc_critic(h_size, num_layers, vis_encode_type)
Mutant 533
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -71,7 +71,7 @@
lr,
self.policy.global_step,
int(max_step),
- min_value=1e-10,
+ min_value=1.0000000001,
)
self._create_losses(
self.policy.total_log_probs,
Mutant 536
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -89,7 +89,7 @@
{
"value_loss": self.value_loss,
"policy_loss": self.abs_policy_loss,
- "update_batch": self.update_batch,
+ "XXupdate_batchXX": self.update_batch,
"learning_rate": self.learning_rate,
"decay_epsilon": self.decay_epsilon,
"decay_beta": self.decay_beta,
Mutant 540
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -110,7 +110,7 @@
hidden_stream = ModelUtils.create_observation_streams(
self.policy.visual_in,
self.policy.processed_vector_in,
- 1,
+ 2,
h_size,
num_layers,
vis_encode_type,
Mutant 544
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -133,7 +133,7 @@
self.all_old_log_probs = tf.placeholder(
shape=[None, sum(self.policy.act_size)],
dtype=tf.float32,
- name="old_probabilities",
+ name="XXold_probabilitiesXX",
)
self.old_log_probs = tf.reduce_sum(
Mutant 546
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -137,7 +137,7 @@
)
self.old_log_probs = tf.reduce_sum(
- (tf.identity(self.all_old_log_probs)), axis=1, keepdims=True
+ (tf.identity(self.all_old_log_probs)), axis=1, keepdims=False
)
def _create_dc_critic(
Mutant 547
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -152,7 +152,7 @@
hidden_stream = ModelUtils.create_observation_streams(
self.policy.visual_in,
self.policy.processed_vector_in,
- 1,
+ 2,
h_size,
num_layers,
vis_encode_type,
Mutant 551
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -176,7 +176,7 @@
self.all_old_log_probs = tf.placeholder(
shape=[None, sum(self.policy.act_size)],
dtype=tf.float32,
- name="old_probabilities",
+ name="XXold_probabilitiesXX",
)
# Break old log probs into separate branches
Mutant 552
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -188,7 +188,7 @@
old_log_prob_branches, self.policy.action_masks, self.policy.act_size
)
- action_idx = [0] + list(np.cumsum(self.policy.act_size))
+ action_idx = [1] + list(np.cumsum(self.policy.act_size))
self.old_log_probs = tf.reduce_sum(
(
Mutant 567
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -237,7 +237,7 @@
self.returns_holders[name] = returns_holder
self.old_values[name] = old_value
self.advantage = tf.placeholder(
- shape=[None], dtype=tf.float32, name="advantages"
+ shape=[None], dtype=tf.float32, name="XXadvantagesXX"
)
advantage = tf.expand_dims(self.advantage, -1)
Mutant 568
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -239,7 +239,7 @@
self.advantage = tf.placeholder(
shape=[None], dtype=tf.float32, name="advantages"
)
- advantage = tf.expand_dims(self.advantage, -1)
+ advantage = tf.expand_dims(self.advantage, +1)
self.decay_epsilon = ModelUtils.create_schedule(
self._schedule, epsilon, self.policy.global_step, max_step, min_value=0.1
Mutant 571
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -242,7 +242,7 @@
advantage = tf.expand_dims(self.advantage, -1)
self.decay_epsilon = ModelUtils.create_schedule(
- self._schedule, epsilon, self.policy.global_step, max_step, min_value=0.1
+ self._schedule, epsilon, self.policy.global_step, max_step, min_value=1.1
)
self.decay_beta = ModelUtils.create_schedule(
self._schedule, beta, self.policy.global_step, max_step, min_value=1e-5
Mutant 574
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -250,7 +250,7 @@
value_losses = []
for name, head in value_heads.items():
- clipped_value_estimate = self.old_values[name] + tf.clip_by_value(
+ clipped_value_estimate = self.old_values[name] - tf.clip_by_value(
tf.reduce_sum(head, axis=1) - self.old_values[name],
-self.decay_epsilon,
self.decay_epsilon,
Mutant 576
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -251,7 +251,7 @@
value_losses = []
for name, head in value_heads.items():
clipped_value_estimate = self.old_values[name] + tf.clip_by_value(
- tf.reduce_sum(head, axis=1) - self.old_values[name],
+ tf.reduce_sum(head, axis=1) + self.old_values[name],
-self.decay_epsilon,
self.decay_epsilon,
)
Mutant 579
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -262,7 +262,7 @@
self.returns_holders[name], clipped_value_estimate
)
value_loss = tf.reduce_mean(
- tf.dynamic_partition(tf.maximum(v_opt_a, v_opt_b), self.policy.mask, 2)[
+ tf.dynamic_partition(tf.maximum(v_opt_a, v_opt_b), self.policy.mask, 3)[
1
]
)
Mutant 586
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -273,7 +273,7 @@
p_opt_a = r_theta * advantage
p_opt_b = (
tf.clip_by_value(
- r_theta, 1.0 - self.decay_epsilon, 1.0 + self.decay_epsilon
+ r_theta, 2.0 - self.decay_epsilon, 1.0 + self.decay_epsilon
)
* advantage
)
Mutant 587
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -273,7 +273,7 @@
p_opt_a = r_theta * advantage
p_opt_b = (
tf.clip_by_value(
- r_theta, 1.0 - self.decay_epsilon, 1.0 + self.decay_epsilon
+ r_theta, 1.0 + self.decay_epsilon, 1.0 + self.decay_epsilon
)
* advantage
)
Mutant 588
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -273,7 +273,7 @@
p_opt_a = r_theta * advantage
p_opt_b = (
tf.clip_by_value(
- r_theta, 1.0 - self.decay_epsilon, 1.0 + self.decay_epsilon
+ r_theta, 1.0 - self.decay_epsilon, 2.0 + self.decay_epsilon
)
* advantage
)
Mutant 592
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -278,7 +278,7 @@
* advantage
)
self.policy_loss = -tf.reduce_mean(
- tf.dynamic_partition(tf.minimum(p_opt_a, p_opt_b), self.policy.mask, 2)[1]
+ tf.dynamic_partition(tf.minimum(p_opt_a, p_opt_b), self.policy.mask, 3)[1]
)
# For cleaner stats reporting
self.abs_policy_loss = tf.abs(self.policy_loss)
Mutant 596
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -285,7 +285,7 @@
self.loss = (
self.policy_loss
- + 0.5 * self.value_loss
+ + 1.5 * self.value_loss
- self.decay_beta
* tf.reduce_mean(tf.dynamic_partition(entropy, self.policy.mask, 2)[1])
)
Mutant 598
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -286,7 +286,7 @@
self.loss = (
self.policy_loss
+ 0.5 * self.value_loss
- - self.decay_beta
+ + self.decay_beta
* tf.reduce_mean(tf.dynamic_partition(entropy, self.policy.mask, 2)[1])
)
Mutant 599
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -287,7 +287,7 @@
self.policy_loss
+ 0.5 * self.value_loss
- self.decay_beta
- * tf.reduce_mean(tf.dynamic_partition(entropy, self.policy.mask, 2)[1])
+ / tf.reduce_mean(tf.dynamic_partition(entropy, self.policy.mask, 2)[1])
)
def _create_ppo_optimizer_ops(self):
Mutant 600
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -287,7 +287,7 @@
self.policy_loss
+ 0.5 * self.value_loss
- self.decay_beta
- * tf.reduce_mean(tf.dynamic_partition(entropy, self.policy.mask, 2)[1])
+ * tf.reduce_mean(tf.dynamic_partition(entropy, self.policy.mask, 3)[1])
)
def _create_ppo_optimizer_ops(self):
Mutant 603
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -292,7 +292,7 @@
def _create_ppo_optimizer_ops(self):
self.tf_optimizer_op = self.create_optimizer_op(self.learning_rate)
- self.grads = self.tf_optimizer_op.compute_gradients(self.loss)
+ self.grads = None
self.update_batch = self.tf_optimizer_op.minimize(self.loss)
@timed
Mutant 605
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -295,7 +295,6 @@
self.grads = self.tf_optimizer_op.compute_gradients(self.loss)
self.update_batch = self.tf_optimizer_op.minimize(self.loss)
- @timed
def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:
"""
Performs update on model.
Mutant 611
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -322,7 +322,7 @@
self, mini_batch: AgentBuffer, num_sequences: int
) -> Dict[tf.Tensor, Any]:
# Do an optional burn-in for memories
- num_burn_in = int(self.burn_in_ratio * self.policy.sequence_length)
+ num_burn_in = int(self.burn_in_ratio / self.policy.sequence_length)
burn_in_mask = np.ones((self.policy.sequence_length), dtype=np.float32)
burn_in_mask[range(0, num_burn_in)] = 0
burn_in_mask = np.tile(burn_in_mask, num_sequences)
Mutant 614
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -324,7 +324,7 @@
# Do an optional burn-in for memories
num_burn_in = int(self.burn_in_ratio * self.policy.sequence_length)
burn_in_mask = np.ones((self.policy.sequence_length), dtype=np.float32)
- burn_in_mask[range(0, num_burn_in)] = 0
+ burn_in_mask[range(1, num_burn_in)] = 0
burn_in_mask = np.tile(burn_in_mask, num_sequences)
feed_dict = {
self.policy.batch_size_ph: num_sequences,
Mutant 615
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -324,7 +324,7 @@
# Do an optional burn-in for memories
num_burn_in = int(self.burn_in_ratio * self.policy.sequence_length)
burn_in_mask = np.ones((self.policy.sequence_length), dtype=np.float32)
- burn_in_mask[range(0, num_burn_in)] = 0
+ burn_in_mask[range(0, num_burn_in)] = 1
burn_in_mask = np.tile(burn_in_mask, num_sequences)
feed_dict = {
self.policy.batch_size_ph: num_sequences,
Mutant 616
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -324,7 +324,7 @@
# Do an optional burn-in for memories
num_burn_in = int(self.burn_in_ratio * self.policy.sequence_length)
burn_in_mask = np.ones((self.policy.sequence_length), dtype=np.float32)
- burn_in_mask[range(0, num_burn_in)] = 0
+ burn_in_mask[range(0, num_burn_in)] = None
burn_in_mask = np.tile(burn_in_mask, num_sequences)
feed_dict = {
self.policy.batch_size_ph: num_sequences,
Mutant 619
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -329,7 +329,7 @@
feed_dict = {
self.policy.batch_size_ph: num_sequences,
self.policy.sequence_length_ph: self.policy.sequence_length,
- self.policy.mask_input: mini_batch["masks"] * burn_in_mask,
+ self.policy.mask_input: mini_batch["masks"] / burn_in_mask,
self.advantage: mini_batch["advantages"],
self.all_old_log_probs: mini_batch["action_probs"],
}
Mutant 627
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -337,7 +337,7 @@
feed_dict[self.returns_holders[name]] = mini_batch[f"{name}_returns"]
feed_dict[self.old_values[name]] = mini_batch[f"{name}_value_estimates"]
- if self.policy.output_pre is not None and "actions_pre" in mini_batch:
+ if self.policy.output_pre is not None or "actions_pre" in mini_batch:
feed_dict[self.policy.output_pre] = mini_batch["actions_pre"]
else:
feed_dict[self.policy.output] = mini_batch["actions"]
Mutant 638
--- ml-agents/mlagents/trainers/ppo/optimizer.py
+++ ml-agents/mlagents/trainers/ppo/optimizer.py
@@ -346,7 +346,7 @@
feed_dict[self.policy.action_masks] = mini_batch["action_mask"]
if "vector_obs" in mini_batch:
feed_dict[self.policy.vector_in] = mini_batch["vector_obs"]
- if self.policy.vis_obs_size > 0:
+ if self.policy.vis_obs_size >= 0:
for i, _ in enumerate(self.policy.visual_in):
feed_dict[self.policy.visual_in[i]] = mini_batch["visual_obs%d" % i]
if self.policy.use_recurrent: