ml-agents/mlagents/trainers/sac/network.py
Killed 91 out of 172 mutantsSurvived
Survived mutation testing. These mutants show holes in your test suite.Mutant 333
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -3,7 +3,7 @@
from mlagents.trainers.tf.models import ModelUtils
from mlagents.trainers.settings import EncoderType
-LOG_STD_MAX = 2
+LOG_STD_MAX = 3
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
Mutant 334
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -3,7 +3,7 @@
from mlagents.trainers.tf.models import ModelUtils
from mlagents.trainers.settings import EncoderType
-LOG_STD_MAX = 2
+LOG_STD_MAX = None
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
Mutant 335
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -4,7 +4,7 @@
from mlagents.trainers.settings import EncoderType
LOG_STD_MAX = 2
-LOG_STD_MIN = -20
+LOG_STD_MIN = +20
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
Mutant 336
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -4,7 +4,7 @@
from mlagents.trainers.settings import EncoderType
LOG_STD_MAX = 2
-LOG_STD_MIN = -20
+LOG_STD_MIN = -21
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
Mutant 337
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -4,7 +4,7 @@
from mlagents.trainers.settings import EncoderType
LOG_STD_MAX = 2
-LOG_STD_MIN = -20
+LOG_STD_MIN = None
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
Mutant 338
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -5,7 +5,7 @@
LOG_STD_MAX = 2
LOG_STD_MIN = -20
-EPSILON = 1e-6 # Small value to avoid divide by zero
+EPSILON = 1.000001 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
Mutant 339
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -5,7 +5,7 @@
LOG_STD_MAX = 2
LOG_STD_MIN = -20
-EPSILON = 1e-6 # Small value to avoid divide by zero
+EPSILON = None # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
Mutant 340
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -6,7 +6,7 @@
LOG_STD_MAX = 2
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
-DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
+DISCRETE_TARGET_ENTROPY_SCALE = 1.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
TARGET_SCOPE = "target_network"
Mutant 341
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -6,7 +6,7 @@
LOG_STD_MAX = 2
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
-DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
+DISCRETE_TARGET_ENTROPY_SCALE = None # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
TARGET_SCOPE = "target_network"
Mutant 342
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -7,7 +7,7 @@
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
-CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
+CONTINUOUS_TARGET_ENTROPY_SCALE = 2.0 # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
TARGET_SCOPE = "target_network"
Mutant 343
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -7,7 +7,7 @@
LOG_STD_MIN = -20
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
-CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
+CONTINUOUS_TARGET_ENTROPY_SCALE = None # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
TARGET_SCOPE = "target_network"
Mutant 344
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -8,7 +8,7 @@
EPSILON = 1e-6 # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
-POLICY_SCOPE = ""
+POLICY_SCOPE = "XXXX"
TARGET_SCOPE = "target_network"
Mutant 346
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -9,7 +9,7 @@
DISCRETE_TARGET_ENTROPY_SCALE = 0.2 # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
POLICY_SCOPE = ""
-TARGET_SCOPE = "target_network"
+TARGET_SCOPE = "XXtarget_networkXX"
class SACNetwork:
Mutant 348
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -21,7 +21,7 @@
self,
policy=None,
m_size=None,
- h_size=128,
+ h_size=129,
normalize=False,
use_recurrent=False,
num_layers=2,
Mutant 349
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -22,7 +22,7 @@
policy=None,
m_size=None,
h_size=128,
- normalize=False,
+ normalize=True,
use_recurrent=False,
num_layers=2,
stream_names=None,
Mutant 350
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -23,7 +23,7 @@
m_size=None,
h_size=128,
normalize=False,
- use_recurrent=False,
+ use_recurrent=True,
num_layers=2,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
Mutant 351
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -24,7 +24,7 @@
h_size=128,
normalize=False,
use_recurrent=False,
- num_layers=2,
+ num_layers=3,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
):
Mutant 352
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -28,7 +28,7 @@
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
):
- self.normalize = normalize
+ self.normalize = None
self.use_recurrent = use_recurrent
self.num_layers = num_layers
self.stream_names = stream_names
Mutant 358
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -36,7 +36,7 @@
self.activ_fn = ModelUtils.swish
self.sequence_length_ph = tf.placeholder(
- shape=None, dtype=tf.int32, name="sac_sequence_length"
+ shape=None, dtype=tf.int32, name="XXsac_sequence_lengthXX"
)
self.policy_memory_in: Optional[tf.Tensor] = None
Mutant 359
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -39,7 +39,7 @@
shape=None, dtype=tf.int32, name="sac_sequence_length"
)
- self.policy_memory_in: Optional[tf.Tensor] = None
+ self.policy_memory_in: Optional[tf.Tensor] = ""
self.policy_memory_out: Optional[tf.Tensor] = None
self.value_memory_in: Optional[tf.Tensor] = None
self.value_memory_out: Optional[tf.Tensor] = None
Mutant 360
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -40,7 +40,7 @@
)
self.policy_memory_in: Optional[tf.Tensor] = None
- self.policy_memory_out: Optional[tf.Tensor] = None
+ self.policy_memory_out: Optional[tf.Tensor] = ""
self.value_memory_in: Optional[tf.Tensor] = None
self.value_memory_out: Optional[tf.Tensor] = None
self.q1: Optional[tf.Tensor] = None
Mutant 361
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -41,7 +41,7 @@
self.policy_memory_in: Optional[tf.Tensor] = None
self.policy_memory_out: Optional[tf.Tensor] = None
- self.value_memory_in: Optional[tf.Tensor] = None
+ self.value_memory_in: Optional[tf.Tensor] = ""
self.value_memory_out: Optional[tf.Tensor] = None
self.q1: Optional[tf.Tensor] = None
self.q2: Optional[tf.Tensor] = None
Mutant 362
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -42,7 +42,7 @@
self.policy_memory_in: Optional[tf.Tensor] = None
self.policy_memory_out: Optional[tf.Tensor] = None
self.value_memory_in: Optional[tf.Tensor] = None
- self.value_memory_out: Optional[tf.Tensor] = None
+ self.value_memory_out: Optional[tf.Tensor] = ""
self.q1: Optional[tf.Tensor] = None
self.q2: Optional[tf.Tensor] = None
self.q1_p: Optional[tf.Tensor] = None
Mutant 363
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -43,7 +43,7 @@
self.policy_memory_out: Optional[tf.Tensor] = None
self.value_memory_in: Optional[tf.Tensor] = None
self.value_memory_out: Optional[tf.Tensor] = None
- self.q1: Optional[tf.Tensor] = None
+ self.q1: Optional[tf.Tensor] = ""
self.q2: Optional[tf.Tensor] = None
self.q1_p: Optional[tf.Tensor] = None
self.q2_p: Optional[tf.Tensor] = None
Mutant 364
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -44,7 +44,7 @@
self.value_memory_in: Optional[tf.Tensor] = None
self.value_memory_out: Optional[tf.Tensor] = None
self.q1: Optional[tf.Tensor] = None
- self.q2: Optional[tf.Tensor] = None
+ self.q2: Optional[tf.Tensor] = ""
self.q1_p: Optional[tf.Tensor] = None
self.q2_p: Optional[tf.Tensor] = None
self.q1_memory_in: Optional[tf.Tensor] = None
Mutant 365
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -45,7 +45,7 @@
self.value_memory_out: Optional[tf.Tensor] = None
self.q1: Optional[tf.Tensor] = None
self.q2: Optional[tf.Tensor] = None
- self.q1_p: Optional[tf.Tensor] = None
+ self.q1_p: Optional[tf.Tensor] = ""
self.q2_p: Optional[tf.Tensor] = None
self.q1_memory_in: Optional[tf.Tensor] = None
self.q2_memory_in: Optional[tf.Tensor] = None
Mutant 366
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -46,7 +46,7 @@
self.q1: Optional[tf.Tensor] = None
self.q2: Optional[tf.Tensor] = None
self.q1_p: Optional[tf.Tensor] = None
- self.q2_p: Optional[tf.Tensor] = None
+ self.q2_p: Optional[tf.Tensor] = ""
self.q1_memory_in: Optional[tf.Tensor] = None
self.q2_memory_in: Optional[tf.Tensor] = None
self.q1_memory_out: Optional[tf.Tensor] = None
Mutant 367
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -47,7 +47,7 @@
self.q2: Optional[tf.Tensor] = None
self.q1_p: Optional[tf.Tensor] = None
self.q2_p: Optional[tf.Tensor] = None
- self.q1_memory_in: Optional[tf.Tensor] = None
+ self.q1_memory_in: Optional[tf.Tensor] = ""
self.q2_memory_in: Optional[tf.Tensor] = None
self.q1_memory_out: Optional[tf.Tensor] = None
self.q2_memory_out: Optional[tf.Tensor] = None
Mutant 368
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -48,7 +48,7 @@
self.q1_p: Optional[tf.Tensor] = None
self.q2_p: Optional[tf.Tensor] = None
self.q1_memory_in: Optional[tf.Tensor] = None
- self.q2_memory_in: Optional[tf.Tensor] = None
+ self.q2_memory_in: Optional[tf.Tensor] = ""
self.q1_memory_out: Optional[tf.Tensor] = None
self.q2_memory_out: Optional[tf.Tensor] = None
self.prev_action: Optional[tf.Tensor] = None
Mutant 369
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -49,7 +49,7 @@
self.q2_p: Optional[tf.Tensor] = None
self.q1_memory_in: Optional[tf.Tensor] = None
self.q2_memory_in: Optional[tf.Tensor] = None
- self.q1_memory_out: Optional[tf.Tensor] = None
+ self.q1_memory_out: Optional[tf.Tensor] = ""
self.q2_memory_out: Optional[tf.Tensor] = None
self.prev_action: Optional[tf.Tensor] = None
self.action_masks: Optional[tf.Tensor] = None
Mutant 370
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -50,7 +50,7 @@
self.q1_memory_in: Optional[tf.Tensor] = None
self.q2_memory_in: Optional[tf.Tensor] = None
self.q1_memory_out: Optional[tf.Tensor] = None
- self.q2_memory_out: Optional[tf.Tensor] = None
+ self.q2_memory_out: Optional[tf.Tensor] = ""
self.prev_action: Optional[tf.Tensor] = None
self.action_masks: Optional[tf.Tensor] = None
self.external_action_in: Optional[tf.Tensor] = None
Mutant 371
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -51,7 +51,7 @@
self.q2_memory_in: Optional[tf.Tensor] = None
self.q1_memory_out: Optional[tf.Tensor] = None
self.q2_memory_out: Optional[tf.Tensor] = None
- self.prev_action: Optional[tf.Tensor] = None
+ self.prev_action: Optional[tf.Tensor] = ""
self.action_masks: Optional[tf.Tensor] = None
self.external_action_in: Optional[tf.Tensor] = None
self.log_sigma_sq: Optional[tf.Tensor] = None
Mutant 372
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -52,7 +52,7 @@
self.q1_memory_out: Optional[tf.Tensor] = None
self.q2_memory_out: Optional[tf.Tensor] = None
self.prev_action: Optional[tf.Tensor] = None
- self.action_masks: Optional[tf.Tensor] = None
+ self.action_masks: Optional[tf.Tensor] = ""
self.external_action_in: Optional[tf.Tensor] = None
self.log_sigma_sq: Optional[tf.Tensor] = None
self.entropy: Optional[tf.Tensor] = None
Mutant 373
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -53,7 +53,7 @@
self.q2_memory_out: Optional[tf.Tensor] = None
self.prev_action: Optional[tf.Tensor] = None
self.action_masks: Optional[tf.Tensor] = None
- self.external_action_in: Optional[tf.Tensor] = None
+ self.external_action_in: Optional[tf.Tensor] = ""
self.log_sigma_sq: Optional[tf.Tensor] = None
self.entropy: Optional[tf.Tensor] = None
self.deterministic_output: Optional[tf.Tensor] = None
Mutant 374
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -54,7 +54,7 @@
self.prev_action: Optional[tf.Tensor] = None
self.action_masks: Optional[tf.Tensor] = None
self.external_action_in: Optional[tf.Tensor] = None
- self.log_sigma_sq: Optional[tf.Tensor] = None
+ self.log_sigma_sq: Optional[tf.Tensor] = ""
self.entropy: Optional[tf.Tensor] = None
self.deterministic_output: Optional[tf.Tensor] = None
self.normalized_logprobs: Optional[tf.Tensor] = None
Mutant 375
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -55,7 +55,7 @@
self.action_masks: Optional[tf.Tensor] = None
self.external_action_in: Optional[tf.Tensor] = None
self.log_sigma_sq: Optional[tf.Tensor] = None
- self.entropy: Optional[tf.Tensor] = None
+ self.entropy: Optional[tf.Tensor] = ""
self.deterministic_output: Optional[tf.Tensor] = None
self.normalized_logprobs: Optional[tf.Tensor] = None
self.action_probs: Optional[tf.Tensor] = None
Mutant 376
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -56,7 +56,7 @@
self.external_action_in: Optional[tf.Tensor] = None
self.log_sigma_sq: Optional[tf.Tensor] = None
self.entropy: Optional[tf.Tensor] = None
- self.deterministic_output: Optional[tf.Tensor] = None
+ self.deterministic_output: Optional[tf.Tensor] = ""
self.normalized_logprobs: Optional[tf.Tensor] = None
self.action_probs: Optional[tf.Tensor] = None
self.output_oh: Optional[tf.Tensor] = None
Mutant 377
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -57,7 +57,7 @@
self.log_sigma_sq: Optional[tf.Tensor] = None
self.entropy: Optional[tf.Tensor] = None
self.deterministic_output: Optional[tf.Tensor] = None
- self.normalized_logprobs: Optional[tf.Tensor] = None
+ self.normalized_logprobs: Optional[tf.Tensor] = ""
self.action_probs: Optional[tf.Tensor] = None
self.output_oh: Optional[tf.Tensor] = None
self.output_pre: Optional[tf.Tensor] = None
Mutant 378
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -58,7 +58,7 @@
self.entropy: Optional[tf.Tensor] = None
self.deterministic_output: Optional[tf.Tensor] = None
self.normalized_logprobs: Optional[tf.Tensor] = None
- self.action_probs: Optional[tf.Tensor] = None
+ self.action_probs: Optional[tf.Tensor] = ""
self.output_oh: Optional[tf.Tensor] = None
self.output_pre: Optional[tf.Tensor] = None
Mutant 379
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -59,7 +59,7 @@
self.deterministic_output: Optional[tf.Tensor] = None
self.normalized_logprobs: Optional[tf.Tensor] = None
self.action_probs: Optional[tf.Tensor] = None
- self.output_oh: Optional[tf.Tensor] = None
+ self.output_oh: Optional[tf.Tensor] = ""
self.output_pre: Optional[tf.Tensor] = None
self.value_vars = None
Mutant 380
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -60,7 +60,7 @@
self.normalized_logprobs: Optional[tf.Tensor] = None
self.action_probs: Optional[tf.Tensor] = None
self.output_oh: Optional[tf.Tensor] = None
- self.output_pre: Optional[tf.Tensor] = None
+ self.output_pre: Optional[tf.Tensor] = ""
self.value_vars = None
self.q_vars = None
Mutant 381
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -62,7 +62,7 @@
self.output_oh: Optional[tf.Tensor] = None
self.output_pre: Optional[tf.Tensor] = None
- self.value_vars = None
+ self.value_vars = ""
self.q_vars = None
self.critic_vars = None
self.policy_vars = None
Mutant 382
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -63,7 +63,7 @@
self.output_pre: Optional[tf.Tensor] = None
self.value_vars = None
- self.q_vars = None
+ self.q_vars = ""
self.critic_vars = None
self.policy_vars = None
Mutant 383
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -64,7 +64,7 @@
self.value_vars = None
self.q_vars = None
- self.critic_vars = None
+ self.critic_vars = ""
self.policy_vars = None
self.q1_heads: Dict[str, tf.Tensor] = None
Mutant 384
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -65,7 +65,7 @@
self.value_vars = None
self.q_vars = None
self.critic_vars = None
- self.policy_vars = None
+ self.policy_vars = ""
self.q1_heads: Dict[str, tf.Tensor] = None
self.q2_heads: Dict[str, tf.Tensor] = None
Mutant 385
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -67,7 +67,7 @@
self.critic_vars = None
self.policy_vars = None
- self.q1_heads: Dict[str, tf.Tensor] = None
+ self.q1_heads: Dict[str, tf.Tensor] = ""
self.q2_heads: Dict[str, tf.Tensor] = None
self.q1_pheads: Dict[str, tf.Tensor] = None
self.q2_pheads: Dict[str, tf.Tensor] = None
Mutant 386
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -68,7 +68,7 @@
self.policy_vars = None
self.q1_heads: Dict[str, tf.Tensor] = None
- self.q2_heads: Dict[str, tf.Tensor] = None
+ self.q2_heads: Dict[str, tf.Tensor] = ""
self.q1_pheads: Dict[str, tf.Tensor] = None
self.q2_pheads: Dict[str, tf.Tensor] = None
Mutant 387
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -69,7 +69,7 @@
self.q1_heads: Dict[str, tf.Tensor] = None
self.q2_heads: Dict[str, tf.Tensor] = None
- self.q1_pheads: Dict[str, tf.Tensor] = None
+ self.q1_pheads: Dict[str, tf.Tensor] = ""
self.q2_pheads: Dict[str, tf.Tensor] = None
self.policy = policy
Mutant 388
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -70,7 +70,7 @@
self.q1_heads: Dict[str, tf.Tensor] = None
self.q2_heads: Dict[str, tf.Tensor] = None
self.q1_pheads: Dict[str, tf.Tensor] = None
- self.q2_pheads: Dict[str, tf.Tensor] = None
+ self.q2_pheads: Dict[str, tf.Tensor] = ""
self.policy = policy
Mutant 397
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -102,7 +102,7 @@
for name in stream_names:
value = tf.layers.dense(hidden_input, 1, name=f"{name}_value")
self.value_heads[name] = value
- self.value = tf.reduce_mean(list(self.value_heads.values()), 0)
+ self.value = tf.reduce_mean(list(self.value_heads.values()), 1)
def _create_cc_critic(self, hidden_value, scope, create_qs=True):
"""
Mutant 400
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -108,7 +108,7 @@
"""
Creates just the critic network
"""
- scope = self.join_scopes(scope, "critic")
+ scope = self.join_scopes(scope, "XXcriticXX")
self.create_sac_value_head(
self.stream_names,
hidden_value,
Mutant 402
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -114,7 +114,7 @@
hidden_value,
self.num_layers,
self.h_size,
- self.join_scopes(scope, "value"),
+ self.join_scopes(scope, "XXvalueXX"),
)
self.external_action_in = tf.placeholder(
shape=[None, self.policy.act_size[0]],
Mutant 404
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -119,7 +119,7 @@
self.external_action_in = tf.placeholder(
shape=[None, self.policy.act_size[0]],
dtype=tf.float32,
- name="external_action_in",
+ name="XXexternal_action_inXX",
)
self.value_vars = self.get_vars(self.join_scopes(scope, "value"))
if create_qs:
Mutant 405
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -121,7 +121,7 @@
dtype=tf.float32,
name="external_action_in",
)
- self.value_vars = self.get_vars(self.join_scopes(scope, "value"))
+ self.value_vars = self.get_vars(self.join_scopes(scope, "XXvalueXX"))
if create_qs:
hidden_q = tf.concat([hidden_value, self.external_action_in], axis=-1)
hidden_qp = tf.concat([hidden_value, self.policy.output], axis=-1)
Mutant 407
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -123,7 +123,7 @@
)
self.value_vars = self.get_vars(self.join_scopes(scope, "value"))
if create_qs:
- hidden_q = tf.concat([hidden_value, self.external_action_in], axis=-1)
+ hidden_q = tf.concat([hidden_value, self.external_action_in], axis=+1)
hidden_qp = tf.concat([hidden_value, self.policy.output], axis=-1)
self.q1_heads, self.q2_heads, self.q1, self.q2 = self.create_q_heads(
self.stream_names,
Mutant 410
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -124,7 +124,7 @@
self.value_vars = self.get_vars(self.join_scopes(scope, "value"))
if create_qs:
hidden_q = tf.concat([hidden_value, self.external_action_in], axis=-1)
- hidden_qp = tf.concat([hidden_value, self.policy.output], axis=-1)
+ hidden_qp = tf.concat([hidden_value, self.policy.output], axis=+1)
self.q1_heads, self.q2_heads, self.q1, self.q2 = self.create_q_heads(
self.stream_names,
hidden_q,
Mutant 416
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -140,7 +140,7 @@
self.join_scopes(scope, "q"),
reuse=True,
)
- self.q_vars = self.get_vars(self.join_scopes(scope, "q"))
+ self.q_vars = self.get_vars(self.join_scopes(scope, "XXqXX"))
self.critic_vars = self.get_vars(scope)
def _create_dc_critic(self, hidden_value, scope, create_qs=True):
Mutant 431
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -195,7 +195,7 @@
"""
with tf.variable_scope(scope):
value_hidden = ModelUtils.create_vector_observation_encoder(
- hidden_input, h_size, self.activ_fn, num_layers, "encoder", False
+ hidden_input, h_size, self.activ_fn, num_layers, "XXencoderXX", False
)
if self.use_recurrent:
value_hidden, memory_out = ModelUtils.create_recurrent_encoder(
Mutant 433
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -202,7 +202,7 @@
value_hidden,
self.value_memory_in,
self.sequence_length_ph,
- name="lstm_value",
+ name="XXlstm_valueXX",
)
self.value_memory_out = memory_out
self.create_value_heads(stream_names, value_hidden)
Mutant 435
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -230,7 +230,7 @@
:param reuse: Whether or not to reuse variables. Useful for creating Q of policy.
:param num_outputs: Number of outputs of each Q function. If discrete, equal to number of actions.
"""
- with tf.variable_scope(self.join_scopes(scope, "q1_encoding"), reuse=reuse):
+ with tf.variable_scope(self.join_scopes(scope, "XXq1_encodingXX"), reuse=reuse):
q1_hidden = ModelUtils.create_vector_observation_encoder(
hidden_input, h_size, self.activ_fn, num_layers, "q1_encoder", reuse
)
Mutant 436
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -232,7 +232,7 @@
"""
with tf.variable_scope(self.join_scopes(scope, "q1_encoding"), reuse=reuse):
q1_hidden = ModelUtils.create_vector_observation_encoder(
- hidden_input, h_size, self.activ_fn, num_layers, "q1_encoder", reuse
+ hidden_input, h_size, self.activ_fn, num_layers, "XXq1_encoderXX", reuse
)
if self.use_recurrent:
q1_hidden, memory_out = ModelUtils.create_recurrent_encoder(
Mutant 437
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -239,7 +239,7 @@
q1_hidden,
self.q1_memory_in,
self.sequence_length_ph,
- name="lstm_q1",
+ name="XXlstm_q1XX",
)
self.q1_memory_out = memory_out
Mutant 444
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -249,7 +249,7 @@
q1_heads[name] = _q1
q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)
- with tf.variable_scope(self.join_scopes(scope, "q2_encoding"), reuse=reuse):
+ with tf.variable_scope(self.join_scopes(scope, "XXq2_encodingXX"), reuse=reuse):
q2_hidden = ModelUtils.create_vector_observation_encoder(
hidden_input, h_size, self.activ_fn, num_layers, "q2_encoder", reuse
)
Mutant 445
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -251,7 +251,7 @@
q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)
with tf.variable_scope(self.join_scopes(scope, "q2_encoding"), reuse=reuse):
q2_hidden = ModelUtils.create_vector_observation_encoder(
- hidden_input, h_size, self.activ_fn, num_layers, "q2_encoder", reuse
+ hidden_input, h_size, self.activ_fn, num_layers, "XXq2_encoderXX", reuse
)
if self.use_recurrent:
q2_hidden, memory_out = ModelUtils.create_recurrent_encoder(
Mutant 446
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -258,7 +258,7 @@
q2_hidden,
self.q2_memory_in,
self.sequence_length_ph,
- name="lstm_q2",
+ name="XXlstm_q2XX",
)
self.q2_memory_out = memory_out
Mutant 451
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -267,7 +267,7 @@
_q2 = tf.layers.dense(q2_hidden, num_outputs, name=f"{name}_q2")
q2_heads[name] = _q2
- q2 = tf.reduce_mean(list(q2_heads.values()), axis=0)
+ q2 = tf.reduce_mean(list(q2_heads.values()), axis=1)
return q1_heads, q2_heads, q1, q2
Mutant 452
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -267,7 +267,7 @@
_q2 = tf.layers.dense(q2_hidden, num_outputs, name=f"{name}_q2")
q2_heads[name] = _q2
- q2 = tf.reduce_mean(list(q2_heads.values()), axis=0)
+ q2 = None
return q1_heads, q2_heads, q1, q2
Mutant 453
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -282,7 +282,7 @@
self,
policy,
m_size=None,
- h_size=128,
+ h_size=129,
normalize=False,
use_recurrent=False,
num_layers=2,
Mutant 454
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -283,7 +283,7 @@
policy,
m_size=None,
h_size=128,
- normalize=False,
+ normalize=True,
use_recurrent=False,
num_layers=2,
stream_names=None,
Mutant 455
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -284,7 +284,7 @@
m_size=None,
h_size=128,
normalize=False,
- use_recurrent=False,
+ use_recurrent=True,
num_layers=2,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
Mutant 456
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -285,7 +285,7 @@
h_size=128,
normalize=False,
use_recurrent=False,
- num_layers=2,
+ num_layers=3,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
):
Mutant 458
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -317,7 +317,7 @@
)
else:
self.processed_vector_in = self.vector_in
- self.update_normalization_op = None
+ self.update_normalization_op = ""
if self.policy.use_recurrent:
self.memory_in = tf.placeholder(
Mutant 459
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -321,7 +321,7 @@
if self.policy.use_recurrent:
self.memory_in = tf.placeholder(
- shape=[None, m_size], dtype=tf.float32, name="target_recurrent_in"
+ shape=[None, m_size], dtype=tf.float32, name="XXtarget_recurrent_inXX"
)
self.value_memory_in = self.memory_in
hidden_streams = ModelUtils.create_observation_streams(
Mutant 468
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -339,7 +339,7 @@
self._create_dc_critic(hidden_streams[0], TARGET_SCOPE, create_qs=False)
if self.use_recurrent:
self.memory_out = tf.concat(
- self.value_memory_out, axis=1
+ self.value_memory_out, axis=2
) # Needed for Barracuda to work
def copy_normalization(self, mean, variance, steps):
Mutant 469
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -367,7 +367,7 @@
self,
policy,
m_size=None,
- h_size=128,
+ h_size=129,
normalize=False,
use_recurrent=False,
num_layers=2,
Mutant 470
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -368,7 +368,7 @@
policy,
m_size=None,
h_size=128,
- normalize=False,
+ normalize=True,
use_recurrent=False,
num_layers=2,
stream_names=None,
Mutant 471
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -369,7 +369,7 @@
m_size=None,
h_size=128,
normalize=False,
- use_recurrent=False,
+ use_recurrent=True,
num_layers=2,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
Mutant 472
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -370,7 +370,7 @@
h_size=128,
normalize=False,
use_recurrent=False,
- num_layers=2,
+ num_layers=3,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
):
Mutant 480
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -407,7 +407,7 @@
:param m_size: the total size of the memory.
"""
self.memory_in = tf.placeholder(
- shape=[None, m_size * 3], dtype=tf.float32, name="value_recurrent_in"
+ shape=[None, m_size * 3], dtype=tf.float32, name="XXvalue_recurrent_inXX"
)
# Re-break-up for each network
Mutant 494
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -418,7 +418,7 @@
_start = input_size // num_mems * i
_end = input_size // num_mems * (i + 1)
mem_ins.append(self.memory_in[:, _start:_end])
- self.value_memory_in = mem_ins[0]
+ self.value_memory_in = mem_ins[1]
self.q1_memory_in = mem_ins[1]
self.q2_memory_in = mem_ins[2]
Mutant 496
--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -419,7 +419,7 @@
_end = input_size // num_mems * (i + 1)
mem_ins.append(self.memory_in[:, _start:_end])
self.value_memory_in = mem_ins[0]
- self.q1_memory_in = mem_ins[1]
+ self.q1_memory_in = mem_ins[2]
self.q2_memory_in = mem_ins[2]
def _create_observation_in(self, vis_encode_type):