ml-agents/mlagents/trainers/sac/network.py
Killed 88 out of 172 mutantsSurvived
Survived mutation testing. These mutants show holes in your test suite.Mutant 80
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -3,7 +3,7 @@
from mlagents.trainers.tf.models import ModelUtils
from mlagents.trainers.settings import EncoderType
-LOG_STD_MAX = 2
+LOG_STD_MAX = 3
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
Mutant 81
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -3,7 +3,7 @@
from mlagents.trainers.tf.models import ModelUtils
from mlagents.trainers.settings import EncoderType
-LOG_STD_MAX = 2
+LOG_STD_MAX = None
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
Mutant 82
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -4,7 +4,7 @@
from mlagents.trainers.settings import EncoderType
LOG_STD_MAX = 2
-LOG_STD_MIN = -20
+LOG_STD_MIN = +20
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
Mutant 83
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -4,7 +4,7 @@
from mlagents.trainers.settings import EncoderType
LOG_STD_MAX = 2
-LOG_STD_MIN = -20
+LOG_STD_MIN = -21
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
Mutant 84
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -4,7 +4,7 @@
from mlagents.trainers.settings import EncoderType
LOG_STD_MAX = 2
-LOG_STD_MIN = -20
+LOG_STD_MIN = None
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
Mutant 85
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -5,7 +5,7 @@
LOG_STD_MAX = 2
LOG_STD_MIN = -20
-EPSILON = 1e-6 # Small value to avoid divide by zero
+EPSILON = 1.000001 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
Mutant 86
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -5,7 +5,7 @@
LOG_STD_MAX = 2
LOG_STD_MIN = -20
-EPSILON = 1e-6 # Small value to avoid divide by zero
+EPSILON = None # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
Mutant 87
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -6,7 +6,7 @@
LOG_STD_MAX = 2
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
-DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
+DISCRETE_TARGET_ENTROPY_SCALE = 1.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
TARGET_SCOPE = "target_network"
Mutant 88
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -6,7 +6,7 @@
LOG_STD_MAX = 2
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
-DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
+DISCRETE_TARGET_ENTROPY_SCALE = None # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
TARGET_SCOPE = "target_network"
Mutant 89
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -7,7 +7,7 @@
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
-CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
+CONTINUOUS_TARGET_ENTROPY_SCALE = 2.0 # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
TARGET_SCOPE = "target_network"
Mutant 90
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -7,7 +7,7 @@
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
-CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
+CONTINUOUS_TARGET_ENTROPY_SCALE = None # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
TARGET_SCOPE = "target_network"
Mutant 91
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -8,7 +8,7 @@
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
-POLICY_SCOPE = ""
+POLICY_SCOPE = "XXXX"
TARGET_SCOPE = "target_network"
Mutant 93
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -9,7 +9,7 @@
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
-TARGET_SCOPE = "target_network"
+TARGET_SCOPE = "XXtarget_networkXX"
class SACNetwork:
Mutant 95
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -21,7 +21,7 @@
self,
policy=None,
m_size=None,
- h_size=128,
+ h_size=129,
normalize=False,
use_recurrent=False,
num_layers=2,
Mutant 96
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -22,7 +22,7 @@
policy=None,
m_size=None,
h_size=128,
- normalize=False,
+ normalize=True,
use_recurrent=False,
num_layers=2,
stream_names=None,
Mutant 97
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -23,7 +23,7 @@
m_size=None,
h_size=128,
normalize=False,
- use_recurrent=False,
+ use_recurrent=True,
num_layers=2,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
Mutant 98
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -24,7 +24,7 @@
h_size=128,
normalize=False,
use_recurrent=False,
- num_layers=2,
+ num_layers=3,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
):
Mutant 99
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -28,7 +28,7 @@
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
):
- self.normalize = normalize
+ self.normalize = None
self.use_recurrent = use_recurrent
self.num_layers = num_layers
self.stream_names = stream_names
Mutant 105
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -36,7 +36,7 @@
self.activ_fn = ModelUtils.swish
self.sequence_length_ph = tf.placeholder(
- shape=None, dtype=tf.int32, name="sac_sequence_length"
+ shape=None, dtype=tf.int32, name="XXsac_sequence_lengthXX"
)
self.policy_memory_in: Optional[tf.Tensor] = None
Mutant 106
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -39,7 +39,7 @@
shape=None, dtype=tf.int32, name="sac_sequence_length"
)
- self.policy_memory_in: Optional[tf.Tensor] = None
+ self.policy_memory_in: Optional[tf.Tensor] = ""
self.policy_memory_out: Optional[tf.Tensor] = None
self.value_memory_in: Optional[tf.Tensor] = None
self.value_memory_out: Optional[tf.Tensor] = None
Mutant 107
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -40,7 +40,7 @@
)
self.policy_memory_in: Optional[tf.Tensor] = None
- self.policy_memory_out: Optional[tf.Tensor] = None
+ self.policy_memory_out: Optional[tf.Tensor] = ""
self.value_memory_in: Optional[tf.Tensor] = None
self.value_memory_out: Optional[tf.Tensor] = None
self.q1: Optional[tf.Tensor] = None
Mutant 108
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -41,7 +41,7 @@
self.policy_memory_in: Optional[tf.Tensor] = None
self.policy_memory_out: Optional[tf.Tensor] = None
- self.value_memory_in: Optional[tf.Tensor] = None
+ self.value_memory_in: Optional[tf.Tensor] = ""
self.value_memory_out: Optional[tf.Tensor] = None
self.q1: Optional[tf.Tensor] = None
self.q2: Optional[tf.Tensor] = None
Mutant 109
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -42,7 +42,7 @@
self.policy_memory_in: Optional[tf.Tensor] = None
self.policy_memory_out: Optional[tf.Tensor] = None
self.value_memory_in: Optional[tf.Tensor] = None
- self.value_memory_out: Optional[tf.Tensor] = None
+ self.value_memory_out: Optional[tf.Tensor] = ""
self.q1: Optional[tf.Tensor] = None
self.q2: Optional[tf.Tensor] = None
self.q1_p: Optional[tf.Tensor] = None
Mutant 110
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -43,7 +43,7 @@
self.policy_memory_out: Optional[tf.Tensor] = None
self.value_memory_in: Optional[tf.Tensor] = None
self.value_memory_out: Optional[tf.Tensor] = None
- self.q1: Optional[tf.Tensor] = None
+ self.q1: Optional[tf.Tensor] = ""
self.q2: Optional[tf.Tensor] = None
self.q1_p: Optional[tf.Tensor] = None
self.q2_p: Optional[tf.Tensor] = None
Mutant 111
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -44,7 +44,7 @@
self.value_memory_in: Optional[tf.Tensor] = None
self.value_memory_out: Optional[tf.Tensor] = None
self.q1: Optional[tf.Tensor] = None
- self.q2: Optional[tf.Tensor] = None
+ self.q2: Optional[tf.Tensor] = ""
self.q1_p: Optional[tf.Tensor] = None
self.q2_p: Optional[tf.Tensor] = None
self.q1_memory_in: Optional[tf.Tensor] = None
Mutant 112
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -45,7 +45,7 @@
self.value_memory_out: Optional[tf.Tensor] = None
self.q1: Optional[tf.Tensor] = None
self.q2: Optional[tf.Tensor] = None
- self.q1_p: Optional[tf.Tensor] = None
+ self.q1_p: Optional[tf.Tensor] = ""
self.q2_p: Optional[tf.Tensor] = None
self.q1_memory_in: Optional[tf.Tensor] = None
self.q2_memory_in: Optional[tf.Tensor] = None
Mutant 113
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -46,7 +46,7 @@
self.q1: Optional[tf.Tensor] = None
self.q2: Optional[tf.Tensor] = None
self.q1_p: Optional[tf.Tensor] = None
- self.q2_p: Optional[tf.Tensor] = None
+ self.q2_p: Optional[tf.Tensor] = ""
self.q1_memory_in: Optional[tf.Tensor] = None
self.q2_memory_in: Optional[tf.Tensor] = None
self.q1_memory_out: Optional[tf.Tensor] = None
Mutant 114
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -47,7 +47,7 @@
self.q2: Optional[tf.Tensor] = None
self.q1_p: Optional[tf.Tensor] = None
self.q2_p: Optional[tf.Tensor] = None
- self.q1_memory_in: Optional[tf.Tensor] = None
+ self.q1_memory_in: Optional[tf.Tensor] = ""
self.q2_memory_in: Optional[tf.Tensor] = None
self.q1_memory_out: Optional[tf.Tensor] = None
self.q2_memory_out: Optional[tf.Tensor] = None
Mutant 115
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -48,7 +48,7 @@
self.q1_p: Optional[tf.Tensor] = None
self.q2_p: Optional[tf.Tensor] = None
self.q1_memory_in: Optional[tf.Tensor] = None
- self.q2_memory_in: Optional[tf.Tensor] = None
+ self.q2_memory_in: Optional[tf.Tensor] = ""
self.q1_memory_out: Optional[tf.Tensor] = None
self.q2_memory_out: Optional[tf.Tensor] = None
self.prev_action: Optional[tf.Tensor] = None
Mutant 116
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -49,7 +49,7 @@
self.q2_p: Optional[tf.Tensor] = None
self.q1_memory_in: Optional[tf.Tensor] = None
self.q2_memory_in: Optional[tf.Tensor] = None
- self.q1_memory_out: Optional[tf.Tensor] = None
+ self.q1_memory_out: Optional[tf.Tensor] = ""
self.q2_memory_out: Optional[tf.Tensor] = None
self.prev_action: Optional[tf.Tensor] = None
self.action_masks: Optional[tf.Tensor] = None
Mutant 117
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -50,7 +50,7 @@
self.q1_memory_in: Optional[tf.Tensor] = None
self.q2_memory_in: Optional[tf.Tensor] = None
self.q1_memory_out: Optional[tf.Tensor] = None
- self.q2_memory_out: Optional[tf.Tensor] = None
+ self.q2_memory_out: Optional[tf.Tensor] = ""
self.prev_action: Optional[tf.Tensor] = None
self.action_masks: Optional[tf.Tensor] = None
self.external_action_in: Optional[tf.Tensor] = None
Mutant 118
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -51,7 +51,7 @@
self.q2_memory_in: Optional[tf.Tensor] = None
self.q1_memory_out: Optional[tf.Tensor] = None
self.q2_memory_out: Optional[tf.Tensor] = None
- self.prev_action: Optional[tf.Tensor] = None
+ self.prev_action: Optional[tf.Tensor] = ""
self.action_masks: Optional[tf.Tensor] = None
self.external_action_in: Optional[tf.Tensor] = None
self.log_sigma_sq: Optional[tf.Tensor] = None
Mutant 119
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -52,7 +52,7 @@
self.q1_memory_out: Optional[tf.Tensor] = None
self.q2_memory_out: Optional[tf.Tensor] = None
self.prev_action: Optional[tf.Tensor] = None
- self.action_masks: Optional[tf.Tensor] = None
+ self.action_masks: Optional[tf.Tensor] = ""
self.external_action_in: Optional[tf.Tensor] = None
self.log_sigma_sq: Optional[tf.Tensor] = None
self.entropy: Optional[tf.Tensor] = None
Mutant 120
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -53,7 +53,7 @@
self.q2_memory_out: Optional[tf.Tensor] = None
self.prev_action: Optional[tf.Tensor] = None
self.action_masks: Optional[tf.Tensor] = None
- self.external_action_in: Optional[tf.Tensor] = None
+ self.external_action_in: Optional[tf.Tensor] = ""
self.log_sigma_sq: Optional[tf.Tensor] = None
self.entropy: Optional[tf.Tensor] = None
self.deterministic_output: Optional[tf.Tensor] = None
Mutant 121
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -54,7 +54,7 @@
self.prev_action: Optional[tf.Tensor] = None
self.action_masks: Optional[tf.Tensor] = None
self.external_action_in: Optional[tf.Tensor] = None
- self.log_sigma_sq: Optional[tf.Tensor] = None
+ self.log_sigma_sq: Optional[tf.Tensor] = ""
self.entropy: Optional[tf.Tensor] = None
self.deterministic_output: Optional[tf.Tensor] = None
self.normalized_logprobs: Optional[tf.Tensor] = None
Mutant 122
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -55,7 +55,7 @@
self.action_masks: Optional[tf.Tensor] = None
self.external_action_in: Optional[tf.Tensor] = None
self.log_sigma_sq: Optional[tf.Tensor] = None
- self.entropy: Optional[tf.Tensor] = None
+ self.entropy: Optional[tf.Tensor] = ""
self.deterministic_output: Optional[tf.Tensor] = None
self.normalized_logprobs: Optional[tf.Tensor] = None
self.action_probs: Optional[tf.Tensor] = None
Mutant 123
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -56,7 +56,7 @@
self.external_action_in: Optional[tf.Tensor] = None
self.log_sigma_sq: Optional[tf.Tensor] = None
self.entropy: Optional[tf.Tensor] = None
- self.deterministic_output: Optional[tf.Tensor] = None
+ self.deterministic_output: Optional[tf.Tensor] = ""
self.normalized_logprobs: Optional[tf.Tensor] = None
self.action_probs: Optional[tf.Tensor] = None
self.output_oh: Optional[tf.Tensor] = None
Mutant 124
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -57,7 +57,7 @@
self.log_sigma_sq: Optional[tf.Tensor] = None
self.entropy: Optional[tf.Tensor] = None
self.deterministic_output: Optional[tf.Tensor] = None
- self.normalized_logprobs: Optional[tf.Tensor] = None
+ self.normalized_logprobs: Optional[tf.Tensor] = ""
self.action_probs: Optional[tf.Tensor] = None
self.output_oh: Optional[tf.Tensor] = None
self.output_pre: Optional[tf.Tensor] = None
Mutant 125
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -58,7 +58,7 @@
self.entropy: Optional[tf.Tensor] = None
self.deterministic_output: Optional[tf.Tensor] = None
self.normalized_logprobs: Optional[tf.Tensor] = None
- self.action_probs: Optional[tf.Tensor] = None
+ self.action_probs: Optional[tf.Tensor] = ""
self.output_oh: Optional[tf.Tensor] = None
self.output_pre: Optional[tf.Tensor] = None
Mutant 126
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -59,7 +59,7 @@
self.deterministic_output: Optional[tf.Tensor] = None
self.normalized_logprobs: Optional[tf.Tensor] = None
self.action_probs: Optional[tf.Tensor] = None
- self.output_oh: Optional[tf.Tensor] = None
+ self.output_oh: Optional[tf.Tensor] = ""
self.output_pre: Optional[tf.Tensor] = None
self.value_vars = None
Mutant 127
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -60,7 +60,7 @@
self.normalized_logprobs: Optional[tf.Tensor] = None
self.action_probs: Optional[tf.Tensor] = None
self.output_oh: Optional[tf.Tensor] = None
- self.output_pre: Optional[tf.Tensor] = None
+ self.output_pre: Optional[tf.Tensor] = ""
self.value_vars = None
self.q_vars = None
Mutant 128
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -62,7 +62,7 @@
self.output_oh: Optional[tf.Tensor] = None
self.output_pre: Optional[tf.Tensor] = None
- self.value_vars = None
+ self.value_vars = ""
self.q_vars = None
self.critic_vars = None
self.policy_vars = None
Mutant 129
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -63,7 +63,7 @@
self.output_pre: Optional[tf.Tensor] = None
self.value_vars = None
- self.q_vars = None
+ self.q_vars = ""
self.critic_vars = None
self.policy_vars = None
Mutant 130
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -64,7 +64,7 @@
self.value_vars = None
self.q_vars = None
- self.critic_vars = None
+ self.critic_vars = ""
self.policy_vars = None
self.q1_heads: Dict[str, tf.Tensor] = None
Mutant 131
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -65,7 +65,7 @@
self.value_vars = None
self.q_vars = None
self.critic_vars = None
- self.policy_vars = None
+ self.policy_vars = ""
self.q1_heads: Dict[str, tf.Tensor] = None
self.q2_heads: Dict[str, tf.Tensor] = None
Mutant 132
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -67,7 +67,7 @@
self.critic_vars = None
self.policy_vars = None
- self.q1_heads: Dict[str, tf.Tensor] = None
+ self.q1_heads: Dict[str, tf.Tensor] = ""
self.q2_heads: Dict[str, tf.Tensor] = None
self.q1_pheads: Dict[str, tf.Tensor] = None
self.q2_pheads: Dict[str, tf.Tensor] = None
Mutant 133
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -68,7 +68,7 @@
self.policy_vars = None
self.q1_heads: Dict[str, tf.Tensor] = None
- self.q2_heads: Dict[str, tf.Tensor] = None
+ self.q2_heads: Dict[str, tf.Tensor] = ""
self.q1_pheads: Dict[str, tf.Tensor] = None
self.q2_pheads: Dict[str, tf.Tensor] = None
Mutant 134
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -69,7 +69,7 @@
self.q1_heads: Dict[str, tf.Tensor] = None
self.q2_heads: Dict[str, tf.Tensor] = None
- self.q1_pheads: Dict[str, tf.Tensor] = None
+ self.q1_pheads: Dict[str, tf.Tensor] = ""
self.q2_pheads: Dict[str, tf.Tensor] = None
self.policy = policy
Mutant 135
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -70,7 +70,7 @@
self.q1_heads: Dict[str, tf.Tensor] = None
self.q2_heads: Dict[str, tf.Tensor] = None
self.q1_pheads: Dict[str, tf.Tensor] = None
- self.q2_pheads: Dict[str, tf.Tensor] = None
+ self.q2_pheads: Dict[str, tf.Tensor] = ""
self.policy = policy
Mutant 138
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -84,7 +84,7 @@
"""
if not scope_1:
return scope_2
- if not scope_2:
+ if scope_2:
return scope_1
else:
return "/".join(filter(None, [scope_1, scope_2]))
Mutant 139
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -87,7 +87,7 @@
if not scope_2:
return scope_1
else:
- return "/".join(filter(None, [scope_1, scope_2]))
+ return "XX/XX".join(filter(None, [scope_1, scope_2]))
def create_value_heads(self, stream_names, hidden_input):
"""
Mutant 144
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -102,7 +102,7 @@
for name in stream_names:
value = tf.layers.dense(hidden_input, 1, name=f"{name}_value")
self.value_heads[name] = value
- self.value = tf.reduce_mean(list(self.value_heads.values()), 0)
+ self.value = tf.reduce_mean(list(self.value_heads.values()), 1)
def _create_cc_critic(self, hidden_value, scope, create_qs=True):
"""
Mutant 147
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -108,7 +108,7 @@
"""
Creates just the critic network
"""
- scope = self.join_scopes(scope, "critic")
+ scope = self.join_scopes(scope, "XXcriticXX")
self.create_sac_value_head(
self.stream_names,
hidden_value,
Mutant 151
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -119,7 +119,7 @@
self.external_action_in = tf.placeholder(
shape=[None, self.policy.act_size[0]],
dtype=tf.float32,
- name="external_action_in",
+ name="XXexternal_action_inXX",
)
self.value_vars = self.get_vars(self.join_scopes(scope, "value"))
if create_qs:
Mutant 154
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -123,7 +123,7 @@
)
self.value_vars = self.get_vars(self.join_scopes(scope, "value"))
if create_qs:
- hidden_q = tf.concat([hidden_value, self.external_action_in], axis=-1)
+ hidden_q = tf.concat([hidden_value, self.external_action_in], axis=+1)
hidden_qp = tf.concat([hidden_value, self.policy.output], axis=-1)
self.q1_heads, self.q2_heads, self.q1, self.q2 = self.create_q_heads(
self.stream_names,
Mutant 157
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -124,7 +124,7 @@
self.value_vars = self.get_vars(self.join_scopes(scope, "value"))
if create_qs:
hidden_q = tf.concat([hidden_value, self.external_action_in], axis=-1)
- hidden_qp = tf.concat([hidden_value, self.policy.output], axis=-1)
+ hidden_qp = tf.concat([hidden_value, self.policy.output], axis=+1)
self.q1_heads, self.q2_heads, self.q1, self.q2 = self.create_q_heads(
self.stream_names,
hidden_q,
Mutant 163
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -140,7 +140,7 @@
self.join_scopes(scope, "q"),
reuse=True,
)
- self.q_vars = self.get_vars(self.join_scopes(scope, "q"))
+ self.q_vars = self.get_vars(self.join_scopes(scope, "XXqXX"))
self.critic_vars = self.get_vars(scope)
def _create_dc_critic(self, hidden_value, scope, create_qs=True):
Mutant 169
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -153,7 +153,7 @@
hidden_value,
self.num_layers,
self.h_size,
- self.join_scopes(scope, "value"),
+ self.join_scopes(scope, "XXvalueXX"),
)
self.value_vars = self.get_vars("/".join([scope, "value"]))
Mutant 170
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -156,7 +156,7 @@
self.join_scopes(scope, "value"),
)
- self.value_vars = self.get_vars("/".join([scope, "value"]))
+ self.value_vars = self.get_vars("XX/XX".join([scope, "value"]))
if create_qs:
self.q1_heads, self.q2_heads, self.q1, self.q2 = self.create_q_heads(
Mutant 171
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -156,7 +156,7 @@
self.join_scopes(scope, "value"),
)
- self.value_vars = self.get_vars("/".join([scope, "value"]))
+ self.value_vars = self.get_vars("/".join([scope, "XXvalueXX"]))
if create_qs:
self.q1_heads, self.q2_heads, self.q1, self.q2 = self.create_q_heads(
Mutant 178
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -195,7 +195,7 @@
"""
with tf.variable_scope(scope):
value_hidden = ModelUtils.create_vector_observation_encoder(
- hidden_input, h_size, self.activ_fn, num_layers, "encoder", False
+ hidden_input, h_size, self.activ_fn, num_layers, "XXencoderXX", False
)
if self.use_recurrent:
value_hidden, memory_out = ModelUtils.create_recurrent_encoder(
Mutant 180
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -202,7 +202,7 @@
value_hidden,
self.value_memory_in,
self.sequence_length_ph,
- name="lstm_value",
+ name="XXlstm_valueXX",
)
self.value_memory_out = memory_out
self.create_value_heads(stream_names, value_hidden)
Mutant 182
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -230,7 +230,7 @@
:param reuse: Whether or not to reuse variables. Useful for creating Q of policy.
:param num_outputs: Number of outputs of each Q function. If discrete, equal to number of actions.
"""
- with tf.variable_scope(self.join_scopes(scope, "q1_encoding"), reuse=reuse):
+ with tf.variable_scope(self.join_scopes(scope, "XXq1_encodingXX"), reuse=reuse):
q1_hidden = ModelUtils.create_vector_observation_encoder(
hidden_input, h_size, self.activ_fn, num_layers, "q1_encoder", reuse
)
Mutant 183
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -232,7 +232,7 @@
"""
with tf.variable_scope(self.join_scopes(scope, "q1_encoding"), reuse=reuse):
q1_hidden = ModelUtils.create_vector_observation_encoder(
- hidden_input, h_size, self.activ_fn, num_layers, "q1_encoder", reuse
+ hidden_input, h_size, self.activ_fn, num_layers, "XXq1_encoderXX", reuse
)
if self.use_recurrent:
q1_hidden, memory_out = ModelUtils.create_recurrent_encoder(
Mutant 184
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -239,7 +239,7 @@
q1_hidden,
self.q1_memory_in,
self.sequence_length_ph,
- name="lstm_q1",
+ name="XXlstm_q1XX",
)
self.q1_memory_out = memory_out
Mutant 191
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -249,7 +249,7 @@
q1_heads[name] = _q1
q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)
- with tf.variable_scope(self.join_scopes(scope, "q2_encoding"), reuse=reuse):
+ with tf.variable_scope(self.join_scopes(scope, "XXq2_encodingXX"), reuse=reuse):
q2_hidden = ModelUtils.create_vector_observation_encoder(
hidden_input, h_size, self.activ_fn, num_layers, "q2_encoder", reuse
)
Mutant 192
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -251,7 +251,7 @@
q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)
with tf.variable_scope(self.join_scopes(scope, "q2_encoding"), reuse=reuse):
q2_hidden = ModelUtils.create_vector_observation_encoder(
- hidden_input, h_size, self.activ_fn, num_layers, "q2_encoder", reuse
+ hidden_input, h_size, self.activ_fn, num_layers, "XXq2_encoderXX", reuse
)
if self.use_recurrent:
q2_hidden, memory_out = ModelUtils.create_recurrent_encoder(
Mutant 193
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -258,7 +258,7 @@
q2_hidden,
self.q2_memory_in,
self.sequence_length_ph,
- name="lstm_q2",
+ name="XXlstm_q2XX",
)
self.q2_memory_out = memory_out
Mutant 198
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -267,7 +267,7 @@
_q2 = tf.layers.dense(q2_hidden, num_outputs, name=f"{name}_q2")
q2_heads[name] = _q2
- q2 = tf.reduce_mean(list(q2_heads.values()), axis=0)
+ q2 = tf.reduce_mean(list(q2_heads.values()), axis=1)
return q1_heads, q2_heads, q1, q2
Mutant 199
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -267,7 +267,7 @@
_q2 = tf.layers.dense(q2_hidden, num_outputs, name=f"{name}_q2")
q2_heads[name] = _q2
- q2 = tf.reduce_mean(list(q2_heads.values()), axis=0)
+ q2 = None
return q1_heads, q2_heads, q1, q2
Mutant 200
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -282,7 +282,7 @@
self,
policy,
m_size=None,
- h_size=128,
+ h_size=129,
normalize=False,
use_recurrent=False,
num_layers=2,
Mutant 201
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -283,7 +283,7 @@
policy,
m_size=None,
h_size=128,
- normalize=False,
+ normalize=True,
use_recurrent=False,
num_layers=2,
stream_names=None,
Mutant 202
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -284,7 +284,7 @@
m_size=None,
h_size=128,
normalize=False,
- use_recurrent=False,
+ use_recurrent=True,
num_layers=2,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
Mutant 203
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -285,7 +285,7 @@
h_size=128,
normalize=False,
use_recurrent=False,
- num_layers=2,
+ num_layers=3,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
):
Mutant 205
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -317,7 +317,7 @@
)
else:
self.processed_vector_in = self.vector_in
- self.update_normalization_op = None
+ self.update_normalization_op = ""
if self.policy.use_recurrent:
self.memory_in = tf.placeholder(
Mutant 206
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -321,7 +321,7 @@
if self.policy.use_recurrent:
self.memory_in = tf.placeholder(
- shape=[None, m_size], dtype=tf.float32, name="target_recurrent_in"
+ shape=[None, m_size], dtype=tf.float32, name="XXtarget_recurrent_inXX"
)
self.value_memory_in = self.memory_in
hidden_streams = ModelUtils.create_observation_streams(
Mutant 215
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -339,7 +339,7 @@
self._create_dc_critic(hidden_streams[0], TARGET_SCOPE, create_qs=False)
if self.use_recurrent:
self.memory_out = tf.concat(
- self.value_memory_out, axis=1
+ self.value_memory_out, axis=2
) # Needed for Barracuda to work
def copy_normalization(self, mean, variance, steps):
Mutant 216
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -367,7 +367,7 @@
self,
policy,
m_size=None,
- h_size=128,
+ h_size=129,
normalize=False,
use_recurrent=False,
num_layers=2,
Mutant 217
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -368,7 +368,7 @@
policy,
m_size=None,
h_size=128,
- normalize=False,
+ normalize=True,
use_recurrent=False,
num_layers=2,
stream_names=None,
Mutant 218
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -369,7 +369,7 @@
m_size=None,
h_size=128,
normalize=False,
- use_recurrent=False,
+ use_recurrent=True,
num_layers=2,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
Mutant 219
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -370,7 +370,7 @@
h_size=128,
normalize=False,
use_recurrent=False,
- num_layers=2,
+ num_layers=3,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
):
Mutant 227
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -407,7 +407,7 @@
:param m_size: the total size of the memory.
"""
self.memory_in = tf.placeholder(
- shape=[None, m_size * 3], dtype=tf.float32, name="value_recurrent_in"
+ shape=[None, m_size * 3], dtype=tf.float32, name="XXvalue_recurrent_inXX"
)
# Re-break-up for each network
Mutant 241
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -418,7 +418,7 @@
_start = input_size // num_mems * i
_end = input_size // num_mems * (i + 1)
mem_ins.append(self.memory_in[:, _start:_end])
- self.value_memory_in = mem_ins[0]
+ self.value_memory_in = mem_ins[1]
self.q1_memory_in = mem_ins[1]
self.q2_memory_in = mem_ins[2]
Mutant 243
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -419,7 +419,7 @@
_end = input_size // num_mems * (i + 1)
mem_ins.append(self.memory_in[:, _start:_end])
self.value_memory_in = mem_ins[0]
- self.q1_memory_in = mem_ins[1]
+ self.q1_memory_in = mem_ins[2]
self.q2_memory_in = mem_ins[2]
def _create_observation_in(self, vis_encode_type):