ml-agents/mlagents/trainers/sac/network.py
Killed 88 out of 172 mutantsSurvived
Survived mutation testing. These mutants show holes in your test suite.Mutant 254
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -3,7 +3,7 @@
from mlagents.trainers.tf.models import ModelUtils
from mlagents.trainers.settings import EncoderType
-LOG_STD_MAX = 2
+LOG_STD_MAX = 3
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
Mutant 255
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -3,7 +3,7 @@
from mlagents.trainers.tf.models import ModelUtils
from mlagents.trainers.settings import EncoderType
-LOG_STD_MAX = 2
+LOG_STD_MAX = None
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
Mutant 256
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -4,7 +4,7 @@
from mlagents.trainers.settings import EncoderType
LOG_STD_MAX = 2
-LOG_STD_MIN = -20
+LOG_STD_MIN = +20
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
Mutant 257
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -4,7 +4,7 @@
from mlagents.trainers.settings import EncoderType
LOG_STD_MAX = 2
-LOG_STD_MIN = -20
+LOG_STD_MIN = -21
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
Mutant 258
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -4,7 +4,7 @@
from mlagents.trainers.settings import EncoderType
LOG_STD_MAX = 2
-LOG_STD_MIN = -20
+LOG_STD_MIN = None
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
Mutant 259
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -5,7 +5,7 @@
LOG_STD_MAX = 2
LOG_STD_MIN = -20
-EPSILON = 1e-6 # Small value to avoid divide by zero
+EPSILON = 1.000001 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
Mutant 260
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -5,7 +5,7 @@
LOG_STD_MAX = 2
LOG_STD_MIN = -20
-EPSILON = 1e-6 # Small value to avoid divide by zero
+EPSILON = None # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
Mutant 261
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -6,7 +6,7 @@
LOG_STD_MAX = 2
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
-DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
+DISCRETE_TARGET_ENTROPY_SCALE = 1.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
TARGET_SCOPE = "target_network"
Mutant 262
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -6,7 +6,7 @@
LOG_STD_MAX = 2
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
-DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
+DISCRETE_TARGET_ENTROPY_SCALE = None # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
TARGET_SCOPE = "target_network"
Mutant 263
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -7,7 +7,7 @@
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
-CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
+CONTINUOUS_TARGET_ENTROPY_SCALE = 2.0 # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
TARGET_SCOPE = "target_network"
Mutant 264
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -7,7 +7,7 @@
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
-CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
+CONTINUOUS_TARGET_ENTROPY_SCALE = None # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
TARGET_SCOPE = "target_network"
Mutant 265
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -8,7 +8,7 @@
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
-POLICY_SCOPE = ""
+POLICY_SCOPE = "XXXX"
TARGET_SCOPE = "target_network"
Mutant 267
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -9,7 +9,7 @@
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
-TARGET_SCOPE = "target_network"
+TARGET_SCOPE = "XXtarget_networkXX"
class SACNetwork:
Mutant 269
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -21,7 +21,7 @@
self,
policy=None,
m_size=None,
- h_size=128,
+ h_size=129,
normalize=False,
use_recurrent=False,
num_layers=2,
Mutant 270
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -22,7 +22,7 @@
policy=None,
m_size=None,
h_size=128,
- normalize=False,
+ normalize=True,
use_recurrent=False,
num_layers=2,
stream_names=None,
Mutant 271
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -23,7 +23,7 @@
m_size=None,
h_size=128,
normalize=False,
- use_recurrent=False,
+ use_recurrent=True,
num_layers=2,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
Mutant 272
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -24,7 +24,7 @@
h_size=128,
normalize=False,
use_recurrent=False,
- num_layers=2,
+ num_layers=3,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
):
Mutant 273
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -28,7 +28,7 @@
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
):
- self.normalize = normalize
+ self.normalize = None
self.use_recurrent = use_recurrent
self.num_layers = num_layers
self.stream_names = stream_names
Mutant 279
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -36,7 +36,7 @@
self.activ_fn = ModelUtils.swish
self.sequence_length_ph = tf.placeholder(
- shape=None, dtype=tf.int32, name="sac_sequence_length"
+ shape=None, dtype=tf.int32, name="XXsac_sequence_lengthXX"
)
self.policy_memory_in: Optional[tf.Tensor] = None
Mutant 280
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -39,7 +39,7 @@
shape=None, dtype=tf.int32, name="sac_sequence_length"
)
- self.policy_memory_in: Optional[tf.Tensor] = None
+ self.policy_memory_in: Optional[tf.Tensor] = ""
self.policy_memory_out: Optional[tf.Tensor] = None
self.value_memory_in: Optional[tf.Tensor] = None
self.value_memory_out: Optional[tf.Tensor] = None
Mutant 281
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -40,7 +40,7 @@
)
self.policy_memory_in: Optional[tf.Tensor] = None
- self.policy_memory_out: Optional[tf.Tensor] = None
+ self.policy_memory_out: Optional[tf.Tensor] = ""
self.value_memory_in: Optional[tf.Tensor] = None
self.value_memory_out: Optional[tf.Tensor] = None
self.q1: Optional[tf.Tensor] = None
Mutant 282
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -41,7 +41,7 @@
self.policy_memory_in: Optional[tf.Tensor] = None
self.policy_memory_out: Optional[tf.Tensor] = None
- self.value_memory_in: Optional[tf.Tensor] = None
+ self.value_memory_in: Optional[tf.Tensor] = ""
self.value_memory_out: Optional[tf.Tensor] = None
self.q1: Optional[tf.Tensor] = None
self.q2: Optional[tf.Tensor] = None
Mutant 283
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -42,7 +42,7 @@
self.policy_memory_in: Optional[tf.Tensor] = None
self.policy_memory_out: Optional[tf.Tensor] = None
self.value_memory_in: Optional[tf.Tensor] = None
- self.value_memory_out: Optional[tf.Tensor] = None
+ self.value_memory_out: Optional[tf.Tensor] = ""
self.q1: Optional[tf.Tensor] = None
self.q2: Optional[tf.Tensor] = None
self.q1_p: Optional[tf.Tensor] = None
Mutant 284
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -43,7 +43,7 @@
self.policy_memory_out: Optional[tf.Tensor] = None
self.value_memory_in: Optional[tf.Tensor] = None
self.value_memory_out: Optional[tf.Tensor] = None
- self.q1: Optional[tf.Tensor] = None
+ self.q1: Optional[tf.Tensor] = ""
self.q2: Optional[tf.Tensor] = None
self.q1_p: Optional[tf.Tensor] = None
self.q2_p: Optional[tf.Tensor] = None
Mutant 285
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -44,7 +44,7 @@
self.value_memory_in: Optional[tf.Tensor] = None
self.value_memory_out: Optional[tf.Tensor] = None
self.q1: Optional[tf.Tensor] = None
- self.q2: Optional[tf.Tensor] = None
+ self.q2: Optional[tf.Tensor] = ""
self.q1_p: Optional[tf.Tensor] = None
self.q2_p: Optional[tf.Tensor] = None
self.q1_memory_in: Optional[tf.Tensor] = None
Mutant 286
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -45,7 +45,7 @@
self.value_memory_out: Optional[tf.Tensor] = None
self.q1: Optional[tf.Tensor] = None
self.q2: Optional[tf.Tensor] = None
- self.q1_p: Optional[tf.Tensor] = None
+ self.q1_p: Optional[tf.Tensor] = ""
self.q2_p: Optional[tf.Tensor] = None
self.q1_memory_in: Optional[tf.Tensor] = None
self.q2_memory_in: Optional[tf.Tensor] = None
Mutant 287
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -46,7 +46,7 @@
self.q1: Optional[tf.Tensor] = None
self.q2: Optional[tf.Tensor] = None
self.q1_p: Optional[tf.Tensor] = None
- self.q2_p: Optional[tf.Tensor] = None
+ self.q2_p: Optional[tf.Tensor] = ""
self.q1_memory_in: Optional[tf.Tensor] = None
self.q2_memory_in: Optional[tf.Tensor] = None
self.q1_memory_out: Optional[tf.Tensor] = None
Mutant 288
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -47,7 +47,7 @@
self.q2: Optional[tf.Tensor] = None
self.q1_p: Optional[tf.Tensor] = None
self.q2_p: Optional[tf.Tensor] = None
- self.q1_memory_in: Optional[tf.Tensor] = None
+ self.q1_memory_in: Optional[tf.Tensor] = ""
self.q2_memory_in: Optional[tf.Tensor] = None
self.q1_memory_out: Optional[tf.Tensor] = None
self.q2_memory_out: Optional[tf.Tensor] = None
Mutant 289
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -48,7 +48,7 @@
self.q1_p: Optional[tf.Tensor] = None
self.q2_p: Optional[tf.Tensor] = None
self.q1_memory_in: Optional[tf.Tensor] = None
- self.q2_memory_in: Optional[tf.Tensor] = None
+ self.q2_memory_in: Optional[tf.Tensor] = ""
self.q1_memory_out: Optional[tf.Tensor] = None
self.q2_memory_out: Optional[tf.Tensor] = None
self.prev_action: Optional[tf.Tensor] = None
Mutant 290
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -49,7 +49,7 @@
self.q2_p: Optional[tf.Tensor] = None
self.q1_memory_in: Optional[tf.Tensor] = None
self.q2_memory_in: Optional[tf.Tensor] = None
- self.q1_memory_out: Optional[tf.Tensor] = None
+ self.q1_memory_out: Optional[tf.Tensor] = ""
self.q2_memory_out: Optional[tf.Tensor] = None
self.prev_action: Optional[tf.Tensor] = None
self.action_masks: Optional[tf.Tensor] = None
Mutant 291
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -50,7 +50,7 @@
self.q1_memory_in: Optional[tf.Tensor] = None
self.q2_memory_in: Optional[tf.Tensor] = None
self.q1_memory_out: Optional[tf.Tensor] = None
- self.q2_memory_out: Optional[tf.Tensor] = None
+ self.q2_memory_out: Optional[tf.Tensor] = ""
self.prev_action: Optional[tf.Tensor] = None
self.action_masks: Optional[tf.Tensor] = None
self.external_action_in: Optional[tf.Tensor] = None
Mutant 292
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -51,7 +51,7 @@
self.q2_memory_in: Optional[tf.Tensor] = None
self.q1_memory_out: Optional[tf.Tensor] = None
self.q2_memory_out: Optional[tf.Tensor] = None
- self.prev_action: Optional[tf.Tensor] = None
+ self.prev_action: Optional[tf.Tensor] = ""
self.action_masks: Optional[tf.Tensor] = None
self.external_action_in: Optional[tf.Tensor] = None
self.log_sigma_sq: Optional[tf.Tensor] = None
Mutant 293
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -52,7 +52,7 @@
self.q1_memory_out: Optional[tf.Tensor] = None
self.q2_memory_out: Optional[tf.Tensor] = None
self.prev_action: Optional[tf.Tensor] = None
- self.action_masks: Optional[tf.Tensor] = None
+ self.action_masks: Optional[tf.Tensor] = ""
self.external_action_in: Optional[tf.Tensor] = None
self.log_sigma_sq: Optional[tf.Tensor] = None
self.entropy: Optional[tf.Tensor] = None
Mutant 294
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -53,7 +53,7 @@
self.q2_memory_out: Optional[tf.Tensor] = None
self.prev_action: Optional[tf.Tensor] = None
self.action_masks: Optional[tf.Tensor] = None
- self.external_action_in: Optional[tf.Tensor] = None
+ self.external_action_in: Optional[tf.Tensor] = ""
self.log_sigma_sq: Optional[tf.Tensor] = None
self.entropy: Optional[tf.Tensor] = None
self.deterministic_output: Optional[tf.Tensor] = None
Mutant 295
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -54,7 +54,7 @@
self.prev_action: Optional[tf.Tensor] = None
self.action_masks: Optional[tf.Tensor] = None
self.external_action_in: Optional[tf.Tensor] = None
- self.log_sigma_sq: Optional[tf.Tensor] = None
+ self.log_sigma_sq: Optional[tf.Tensor] = ""
self.entropy: Optional[tf.Tensor] = None
self.deterministic_output: Optional[tf.Tensor] = None
self.normalized_logprobs: Optional[tf.Tensor] = None
Mutant 296
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -55,7 +55,7 @@
self.action_masks: Optional[tf.Tensor] = None
self.external_action_in: Optional[tf.Tensor] = None
self.log_sigma_sq: Optional[tf.Tensor] = None
- self.entropy: Optional[tf.Tensor] = None
+ self.entropy: Optional[tf.Tensor] = ""
self.deterministic_output: Optional[tf.Tensor] = None
self.normalized_logprobs: Optional[tf.Tensor] = None
self.action_probs: Optional[tf.Tensor] = None
Mutant 297
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -56,7 +56,7 @@
self.external_action_in: Optional[tf.Tensor] = None
self.log_sigma_sq: Optional[tf.Tensor] = None
self.entropy: Optional[tf.Tensor] = None
- self.deterministic_output: Optional[tf.Tensor] = None
+ self.deterministic_output: Optional[tf.Tensor] = ""
self.normalized_logprobs: Optional[tf.Tensor] = None
self.action_probs: Optional[tf.Tensor] = None
self.output_oh: Optional[tf.Tensor] = None
Mutant 298
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -57,7 +57,7 @@
self.log_sigma_sq: Optional[tf.Tensor] = None
self.entropy: Optional[tf.Tensor] = None
self.deterministic_output: Optional[tf.Tensor] = None
- self.normalized_logprobs: Optional[tf.Tensor] = None
+ self.normalized_logprobs: Optional[tf.Tensor] = ""
self.action_probs: Optional[tf.Tensor] = None
self.output_oh: Optional[tf.Tensor] = None
self.output_pre: Optional[tf.Tensor] = None
Mutant 299
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -58,7 +58,7 @@
self.entropy: Optional[tf.Tensor] = None
self.deterministic_output: Optional[tf.Tensor] = None
self.normalized_logprobs: Optional[tf.Tensor] = None
- self.action_probs: Optional[tf.Tensor] = None
+ self.action_probs: Optional[tf.Tensor] = ""
self.output_oh: Optional[tf.Tensor] = None
self.output_pre: Optional[tf.Tensor] = None
Mutant 300
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -59,7 +59,7 @@
self.deterministic_output: Optional[tf.Tensor] = None
self.normalized_logprobs: Optional[tf.Tensor] = None
self.action_probs: Optional[tf.Tensor] = None
- self.output_oh: Optional[tf.Tensor] = None
+ self.output_oh: Optional[tf.Tensor] = ""
self.output_pre: Optional[tf.Tensor] = None
self.value_vars = None
Mutant 301
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -60,7 +60,7 @@
self.normalized_logprobs: Optional[tf.Tensor] = None
self.action_probs: Optional[tf.Tensor] = None
self.output_oh: Optional[tf.Tensor] = None
- self.output_pre: Optional[tf.Tensor] = None
+ self.output_pre: Optional[tf.Tensor] = ""
self.value_vars = None
self.q_vars = None
Mutant 302
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -62,7 +62,7 @@
self.output_oh: Optional[tf.Tensor] = None
self.output_pre: Optional[tf.Tensor] = None
- self.value_vars = None
+ self.value_vars = ""
self.q_vars = None
self.critic_vars = None
self.policy_vars = None
Mutant 303
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -63,7 +63,7 @@
self.output_pre: Optional[tf.Tensor] = None
self.value_vars = None
- self.q_vars = None
+ self.q_vars = ""
self.critic_vars = None
self.policy_vars = None
Mutant 304
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -64,7 +64,7 @@
self.value_vars = None
self.q_vars = None
- self.critic_vars = None
+ self.critic_vars = ""
self.policy_vars = None
self.q1_heads: Dict[str, tf.Tensor] = None
Mutant 305
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -65,7 +65,7 @@
self.value_vars = None
self.q_vars = None
self.critic_vars = None
- self.policy_vars = None
+ self.policy_vars = ""
self.q1_heads: Dict[str, tf.Tensor] = None
self.q2_heads: Dict[str, tf.Tensor] = None
Mutant 306
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -67,7 +67,7 @@
self.critic_vars = None
self.policy_vars = None
- self.q1_heads: Dict[str, tf.Tensor] = None
+ self.q1_heads: Dict[str, tf.Tensor] = ""
self.q2_heads: Dict[str, tf.Tensor] = None
self.q1_pheads: Dict[str, tf.Tensor] = None
self.q2_pheads: Dict[str, tf.Tensor] = None
Mutant 307
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -68,7 +68,7 @@
self.policy_vars = None
self.q1_heads: Dict[str, tf.Tensor] = None
- self.q2_heads: Dict[str, tf.Tensor] = None
+ self.q2_heads: Dict[str, tf.Tensor] = ""
self.q1_pheads: Dict[str, tf.Tensor] = None
self.q2_pheads: Dict[str, tf.Tensor] = None
Mutant 308
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -69,7 +69,7 @@
self.q1_heads: Dict[str, tf.Tensor] = None
self.q2_heads: Dict[str, tf.Tensor] = None
- self.q1_pheads: Dict[str, tf.Tensor] = None
+ self.q1_pheads: Dict[str, tf.Tensor] = ""
self.q2_pheads: Dict[str, tf.Tensor] = None
self.policy = policy
Mutant 309
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -70,7 +70,7 @@
self.q1_heads: Dict[str, tf.Tensor] = None
self.q2_heads: Dict[str, tf.Tensor] = None
self.q1_pheads: Dict[str, tf.Tensor] = None
- self.q2_pheads: Dict[str, tf.Tensor] = None
+ self.q2_pheads: Dict[str, tf.Tensor] = ""
self.policy = policy
Mutant 312
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -84,7 +84,7 @@
"""
if not scope_1:
return scope_2
- if not scope_2:
+ if scope_2:
return scope_1
else:
return "/".join(filter(None, [scope_1, scope_2]))
Mutant 313
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -87,7 +87,7 @@
if not scope_2:
return scope_1
else:
- return "/".join(filter(None, [scope_1, scope_2]))
+ return "XX/XX".join(filter(None, [scope_1, scope_2]))
def create_value_heads(self, stream_names, hidden_input):
"""
Mutant 318
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -102,7 +102,7 @@
for name in stream_names:
value = tf.layers.dense(hidden_input, 1, name=f"{name}_value")
self.value_heads[name] = value
- self.value = tf.reduce_mean(list(self.value_heads.values()), 0)
+ self.value = tf.reduce_mean(list(self.value_heads.values()), 1)
def _create_cc_critic(self, hidden_value, scope, create_qs=True):
"""
Mutant 321
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -108,7 +108,7 @@
"""
Creates just the critic network
"""
- scope = self.join_scopes(scope, "critic")
+ scope = self.join_scopes(scope, "XXcriticXX")
self.create_sac_value_head(
self.stream_names,
hidden_value,
Mutant 325
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -119,7 +119,7 @@
self.external_action_in = tf.placeholder(
shape=[None, self.policy.act_size[0]],
dtype=tf.float32,
- name="external_action_in",
+ name="XXexternal_action_inXX",
)
self.value_vars = self.get_vars(self.join_scopes(scope, "value"))
if create_qs:
Mutant 328
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -123,7 +123,7 @@
)
self.value_vars = self.get_vars(self.join_scopes(scope, "value"))
if create_qs:
- hidden_q = tf.concat([hidden_value, self.external_action_in], axis=-1)
+ hidden_q = tf.concat([hidden_value, self.external_action_in], axis=+1)
hidden_qp = tf.concat([hidden_value, self.policy.output], axis=-1)
self.q1_heads, self.q2_heads, self.q1, self.q2 = self.create_q_heads(
self.stream_names,
Mutant 331
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -124,7 +124,7 @@
self.value_vars = self.get_vars(self.join_scopes(scope, "value"))
if create_qs:
hidden_q = tf.concat([hidden_value, self.external_action_in], axis=-1)
- hidden_qp = tf.concat([hidden_value, self.policy.output], axis=-1)
+ hidden_qp = tf.concat([hidden_value, self.policy.output], axis=+1)
self.q1_heads, self.q2_heads, self.q1, self.q2 = self.create_q_heads(
self.stream_names,
hidden_q,
Mutant 337
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -140,7 +140,7 @@
self.join_scopes(scope, "q"),
reuse=True,
)
- self.q_vars = self.get_vars(self.join_scopes(scope, "q"))
+ self.q_vars = self.get_vars(self.join_scopes(scope, "XXqXX"))
self.critic_vars = self.get_vars(scope)
def _create_dc_critic(self, hidden_value, scope, create_qs=True):
Mutant 343
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -153,7 +153,7 @@
hidden_value,
self.num_layers,
self.h_size,
- self.join_scopes(scope, "value"),
+ self.join_scopes(scope, "XXvalueXX"),
)
self.value_vars = self.get_vars("/".join([scope, "value"]))
Mutant 344
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -156,7 +156,7 @@
self.join_scopes(scope, "value"),
)
- self.value_vars = self.get_vars("/".join([scope, "value"]))
+ self.value_vars = self.get_vars("XX/XX".join([scope, "value"]))
if create_qs:
self.q1_heads, self.q2_heads, self.q1, self.q2 = self.create_q_heads(
Mutant 345
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -156,7 +156,7 @@
self.join_scopes(scope, "value"),
)
- self.value_vars = self.get_vars("/".join([scope, "value"]))
+ self.value_vars = self.get_vars("/".join([scope, "XXvalueXX"]))
if create_qs:
self.q1_heads, self.q2_heads, self.q1, self.q2 = self.create_q_heads(
Mutant 352
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -195,7 +195,7 @@
"""
with tf.variable_scope(scope):
value_hidden = ModelUtils.create_vector_observation_encoder(
- hidden_input, h_size, self.activ_fn, num_layers, "encoder", False
+ hidden_input, h_size, self.activ_fn, num_layers, "XXencoderXX", False
)
if self.use_recurrent:
value_hidden, memory_out = ModelUtils.create_recurrent_encoder(
Mutant 354
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -202,7 +202,7 @@
value_hidden,
self.value_memory_in,
self.sequence_length_ph,
- name="lstm_value",
+ name="XXlstm_valueXX",
)
self.value_memory_out = memory_out
self.create_value_heads(stream_names, value_hidden)
Mutant 356
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -230,7 +230,7 @@
:param reuse: Whether or not to reuse variables. Useful for creating Q of policy.
:param num_outputs: Number of outputs of each Q function. If discrete, equal to number of actions.
"""
- with tf.variable_scope(self.join_scopes(scope, "q1_encoding"), reuse=reuse):
+ with tf.variable_scope(self.join_scopes(scope, "XXq1_encodingXX"), reuse=reuse):
q1_hidden = ModelUtils.create_vector_observation_encoder(
hidden_input, h_size, self.activ_fn, num_layers, "q1_encoder", reuse
)
Mutant 357
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -232,7 +232,7 @@
"""
with tf.variable_scope(self.join_scopes(scope, "q1_encoding"), reuse=reuse):
q1_hidden = ModelUtils.create_vector_observation_encoder(
- hidden_input, h_size, self.activ_fn, num_layers, "q1_encoder", reuse
+ hidden_input, h_size, self.activ_fn, num_layers, "XXq1_encoderXX", reuse
)
if self.use_recurrent:
q1_hidden, memory_out = ModelUtils.create_recurrent_encoder(
Mutant 358
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -239,7 +239,7 @@
q1_hidden,
self.q1_memory_in,
self.sequence_length_ph,
- name="lstm_q1",
+ name="XXlstm_q1XX",
)
self.q1_memory_out = memory_out
Mutant 365
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -249,7 +249,7 @@
q1_heads[name] = _q1
q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)
- with tf.variable_scope(self.join_scopes(scope, "q2_encoding"), reuse=reuse):
+ with tf.variable_scope(self.join_scopes(scope, "XXq2_encodingXX"), reuse=reuse):
q2_hidden = ModelUtils.create_vector_observation_encoder(
hidden_input, h_size, self.activ_fn, num_layers, "q2_encoder", reuse
)
Mutant 366
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -251,7 +251,7 @@
q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)
with tf.variable_scope(self.join_scopes(scope, "q2_encoding"), reuse=reuse):
q2_hidden = ModelUtils.create_vector_observation_encoder(
- hidden_input, h_size, self.activ_fn, num_layers, "q2_encoder", reuse
+ hidden_input, h_size, self.activ_fn, num_layers, "XXq2_encoderXX", reuse
)
if self.use_recurrent:
q2_hidden, memory_out = ModelUtils.create_recurrent_encoder(
Mutant 367
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -258,7 +258,7 @@
q2_hidden,
self.q2_memory_in,
self.sequence_length_ph,
- name="lstm_q2",
+ name="XXlstm_q2XX",
)
self.q2_memory_out = memory_out
Mutant 372
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -267,7 +267,7 @@
_q2 = tf.layers.dense(q2_hidden, num_outputs, name=f"{name}_q2")
q2_heads[name] = _q2
- q2 = tf.reduce_mean(list(q2_heads.values()), axis=0)
+ q2 = tf.reduce_mean(list(q2_heads.values()), axis=1)
return q1_heads, q2_heads, q1, q2
Mutant 373
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -267,7 +267,7 @@
_q2 = tf.layers.dense(q2_hidden, num_outputs, name=f"{name}_q2")
q2_heads[name] = _q2
- q2 = tf.reduce_mean(list(q2_heads.values()), axis=0)
+ q2 = None
return q1_heads, q2_heads, q1, q2
Mutant 374
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -282,7 +282,7 @@
self,
policy,
m_size=None,
- h_size=128,
+ h_size=129,
normalize=False,
use_recurrent=False,
num_layers=2,
Mutant 375
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -283,7 +283,7 @@
policy,
m_size=None,
h_size=128,
- normalize=False,
+ normalize=True,
use_recurrent=False,
num_layers=2,
stream_names=None,
Mutant 376
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -284,7 +284,7 @@
m_size=None,
h_size=128,
normalize=False,
- use_recurrent=False,
+ use_recurrent=True,
num_layers=2,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
Mutant 377
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -285,7 +285,7 @@
h_size=128,
normalize=False,
use_recurrent=False,
- num_layers=2,
+ num_layers=3,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
):
Mutant 379
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -317,7 +317,7 @@
)
else:
self.processed_vector_in = self.vector_in
- self.update_normalization_op = None
+ self.update_normalization_op = ""
if self.policy.use_recurrent:
self.memory_in = tf.placeholder(
Mutant 380
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -321,7 +321,7 @@
if self.policy.use_recurrent:
self.memory_in = tf.placeholder(
- shape=[None, m_size], dtype=tf.float32, name="target_recurrent_in"
+ shape=[None, m_size], dtype=tf.float32, name="XXtarget_recurrent_inXX"
)
self.value_memory_in = self.memory_in
hidden_streams = ModelUtils.create_observation_streams(
Mutant 389
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -339,7 +339,7 @@
self._create_dc_critic(hidden_streams[0], TARGET_SCOPE, create_qs=False)
if self.use_recurrent:
self.memory_out = tf.concat(
- self.value_memory_out, axis=1
+ self.value_memory_out, axis=2
) # Needed for Barracuda to work
def copy_normalization(self, mean, variance, steps):
Mutant 390
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -367,7 +367,7 @@
self,
policy,
m_size=None,
- h_size=128,
+ h_size=129,
normalize=False,
use_recurrent=False,
num_layers=2,
Mutant 391
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -368,7 +368,7 @@
policy,
m_size=None,
h_size=128,
- normalize=False,
+ normalize=True,
use_recurrent=False,
num_layers=2,
stream_names=None,
Mutant 392
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -369,7 +369,7 @@
m_size=None,
h_size=128,
normalize=False,
- use_recurrent=False,
+ use_recurrent=True,
num_layers=2,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
Mutant 393
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -370,7 +370,7 @@
h_size=128,
normalize=False,
use_recurrent=False,
- num_layers=2,
+ num_layers=3,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
):
Mutant 401
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -407,7 +407,7 @@
:param m_size: the total size of the memory.
"""
self.memory_in = tf.placeholder(
- shape=[None, m_size * 3], dtype=tf.float32, name="value_recurrent_in"
+ shape=[None, m_size * 3], dtype=tf.float32, name="XXvalue_recurrent_inXX"
)
# Re-break-up for each network
Mutant 415
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -418,7 +418,7 @@
_start = input_size // num_mems * i
_end = input_size // num_mems * (i + 1)
mem_ins.append(self.memory_in[:, _start:_end])
- self.value_memory_in = mem_ins[0]
+ self.value_memory_in = mem_ins[1]
self.q1_memory_in = mem_ins[1]
self.q2_memory_in = mem_ins[2]
Mutant 417
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -419,7 +419,7 @@
_end = input_size // num_mems * (i + 1)
mem_ins.append(self.memory_in[:, _start:_end])
self.value_memory_in = mem_ins[0]
- self.q1_memory_in = mem_ins[1]
+ self.q1_memory_in = mem_ins[2]
self.q2_memory_in = mem_ins[2]
def _create_observation_in(self, vis_encode_type):