ml-agents/mlagents/trainers/sac/network.py

Killed 91 out of 172 mutants

Survived

Survived mutation testing. These mutants show holes in your test suite.

Mutant 333

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -3,7 +3,7 @@
 from mlagents.trainers.tf.models import ModelUtils
 from mlagents.trainers.settings import EncoderType
 
-LOG_STD_MAX = 2
+LOG_STD_MAX = 3
 LOG_STD_MIN = -20
 EPSILON = 1e-6  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05

Mutant 334

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -3,7 +3,7 @@
 from mlagents.trainers.tf.models import ModelUtils
 from mlagents.trainers.settings import EncoderType
 
-LOG_STD_MAX = 2
+LOG_STD_MAX = None
 LOG_STD_MIN = -20
 EPSILON = 1e-6  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05

Mutant 335

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -4,7 +4,7 @@
 from mlagents.trainers.settings import EncoderType
 
 LOG_STD_MAX = 2
-LOG_STD_MIN = -20
+LOG_STD_MIN = +20
 EPSILON = 1e-6  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.

Mutant 336

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -4,7 +4,7 @@
 from mlagents.trainers.settings import EncoderType
 
 LOG_STD_MAX = 2
-LOG_STD_MIN = -20
+LOG_STD_MIN = -21
 EPSILON = 1e-6  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.

Mutant 337

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -4,7 +4,7 @@
 from mlagents.trainers.settings import EncoderType
 
 LOG_STD_MAX = 2
-LOG_STD_MIN = -20
+LOG_STD_MIN = None
 EPSILON = 1e-6  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.

Mutant 338

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -5,7 +5,7 @@
 
 LOG_STD_MAX = 2
 LOG_STD_MIN = -20
-EPSILON = 1e-6  # Small value to avoid divide by zero
+EPSILON = 1.000001  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.
 POLICY_SCOPE = ""

Mutant 339

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -5,7 +5,7 @@
 
 LOG_STD_MAX = 2
 LOG_STD_MIN = -20
-EPSILON = 1e-6  # Small value to avoid divide by zero
+EPSILON = None  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.
 POLICY_SCOPE = ""

Mutant 340

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -6,7 +6,7 @@
 LOG_STD_MAX = 2
 LOG_STD_MIN = -20
 EPSILON = 1e-6  # Small value to avoid divide by zero
-DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
+DISCRETE_TARGET_ENTROPY_SCALE = 1.2  # Roughly equal to e-greedy 0.05
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.
 POLICY_SCOPE = ""
 TARGET_SCOPE = "target_network"

Mutant 341

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -6,7 +6,7 @@
 LOG_STD_MAX = 2
 LOG_STD_MIN = -20
 EPSILON = 1e-6  # Small value to avoid divide by zero
-DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
+DISCRETE_TARGET_ENTROPY_SCALE = None  # Roughly equal to e-greedy 0.05
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.
 POLICY_SCOPE = ""
 TARGET_SCOPE = "target_network"

Mutant 342

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -7,7 +7,7 @@
 LOG_STD_MIN = -20
 EPSILON = 1e-6  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
-CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.
+CONTINUOUS_TARGET_ENTROPY_SCALE = 2.0  # TODO: Make these an optional hyperparam.
 POLICY_SCOPE = ""
 TARGET_SCOPE = "target_network"
 

Mutant 343

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -7,7 +7,7 @@
 LOG_STD_MIN = -20
 EPSILON = 1e-6  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
-CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.
+CONTINUOUS_TARGET_ENTROPY_SCALE = None  # TODO: Make these an optional hyperparam.
 POLICY_SCOPE = ""
 TARGET_SCOPE = "target_network"
 

Mutant 344

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -8,7 +8,7 @@
 EPSILON = 1e-6  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.
-POLICY_SCOPE = ""
+POLICY_SCOPE = "XXXX"
 TARGET_SCOPE = "target_network"
 
 

Mutant 346

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -9,7 +9,7 @@
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.
 POLICY_SCOPE = ""
-TARGET_SCOPE = "target_network"
+TARGET_SCOPE = "XXtarget_networkXX"
 
 
 class SACNetwork:

Mutant 348

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -21,7 +21,7 @@
         self,
         policy=None,
         m_size=None,
-        h_size=128,
+        h_size=129,
         normalize=False,
         use_recurrent=False,
         num_layers=2,

Mutant 349

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -22,7 +22,7 @@
         policy=None,
         m_size=None,
         h_size=128,
-        normalize=False,
+        normalize=True,
         use_recurrent=False,
         num_layers=2,
         stream_names=None,

Mutant 350

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -23,7 +23,7 @@
         m_size=None,
         h_size=128,
         normalize=False,
-        use_recurrent=False,
+        use_recurrent=True,
         num_layers=2,
         stream_names=None,
         vis_encode_type=EncoderType.SIMPLE,

Mutant 351

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -24,7 +24,7 @@
         h_size=128,
         normalize=False,
         use_recurrent=False,
-        num_layers=2,
+        num_layers=3,
         stream_names=None,
         vis_encode_type=EncoderType.SIMPLE,
     ):

Mutant 352

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -28,7 +28,7 @@
         stream_names=None,
         vis_encode_type=EncoderType.SIMPLE,
     ):
-        self.normalize = normalize
+        self.normalize = None
         self.use_recurrent = use_recurrent
         self.num_layers = num_layers
         self.stream_names = stream_names

Mutant 358

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -36,7 +36,7 @@
         self.activ_fn = ModelUtils.swish
 
         self.sequence_length_ph = tf.placeholder(
-            shape=None, dtype=tf.int32, name="sac_sequence_length"
+            shape=None, dtype=tf.int32, name="XXsac_sequence_lengthXX"
         )
 
         self.policy_memory_in: Optional[tf.Tensor] = None

Mutant 359

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -39,7 +39,7 @@
             shape=None, dtype=tf.int32, name="sac_sequence_length"
         )
 
-        self.policy_memory_in: Optional[tf.Tensor] = None
+        self.policy_memory_in: Optional[tf.Tensor] = ""
         self.policy_memory_out: Optional[tf.Tensor] = None
         self.value_memory_in: Optional[tf.Tensor] = None
         self.value_memory_out: Optional[tf.Tensor] = None

Mutant 360

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -40,7 +40,7 @@
         )
 
         self.policy_memory_in: Optional[tf.Tensor] = None
-        self.policy_memory_out: Optional[tf.Tensor] = None
+        self.policy_memory_out: Optional[tf.Tensor] = ""
         self.value_memory_in: Optional[tf.Tensor] = None
         self.value_memory_out: Optional[tf.Tensor] = None
         self.q1: Optional[tf.Tensor] = None

Mutant 361

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -41,7 +41,7 @@
 
         self.policy_memory_in: Optional[tf.Tensor] = None
         self.policy_memory_out: Optional[tf.Tensor] = None
-        self.value_memory_in: Optional[tf.Tensor] = None
+        self.value_memory_in: Optional[tf.Tensor] = ""
         self.value_memory_out: Optional[tf.Tensor] = None
         self.q1: Optional[tf.Tensor] = None
         self.q2: Optional[tf.Tensor] = None

Mutant 362

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -42,7 +42,7 @@
         self.policy_memory_in: Optional[tf.Tensor] = None
         self.policy_memory_out: Optional[tf.Tensor] = None
         self.value_memory_in: Optional[tf.Tensor] = None
-        self.value_memory_out: Optional[tf.Tensor] = None
+        self.value_memory_out: Optional[tf.Tensor] = ""
         self.q1: Optional[tf.Tensor] = None
         self.q2: Optional[tf.Tensor] = None
         self.q1_p: Optional[tf.Tensor] = None

Mutant 363

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -43,7 +43,7 @@
         self.policy_memory_out: Optional[tf.Tensor] = None
         self.value_memory_in: Optional[tf.Tensor] = None
         self.value_memory_out: Optional[tf.Tensor] = None
-        self.q1: Optional[tf.Tensor] = None
+        self.q1: Optional[tf.Tensor] = ""
         self.q2: Optional[tf.Tensor] = None
         self.q1_p: Optional[tf.Tensor] = None
         self.q2_p: Optional[tf.Tensor] = None

Mutant 364

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -44,7 +44,7 @@
         self.value_memory_in: Optional[tf.Tensor] = None
         self.value_memory_out: Optional[tf.Tensor] = None
         self.q1: Optional[tf.Tensor] = None
-        self.q2: Optional[tf.Tensor] = None
+        self.q2: Optional[tf.Tensor] = ""
         self.q1_p: Optional[tf.Tensor] = None
         self.q2_p: Optional[tf.Tensor] = None
         self.q1_memory_in: Optional[tf.Tensor] = None

Mutant 365

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -45,7 +45,7 @@
         self.value_memory_out: Optional[tf.Tensor] = None
         self.q1: Optional[tf.Tensor] = None
         self.q2: Optional[tf.Tensor] = None
-        self.q1_p: Optional[tf.Tensor] = None
+        self.q1_p: Optional[tf.Tensor] = ""
         self.q2_p: Optional[tf.Tensor] = None
         self.q1_memory_in: Optional[tf.Tensor] = None
         self.q2_memory_in: Optional[tf.Tensor] = None

Mutant 366

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -46,7 +46,7 @@
         self.q1: Optional[tf.Tensor] = None
         self.q2: Optional[tf.Tensor] = None
         self.q1_p: Optional[tf.Tensor] = None
-        self.q2_p: Optional[tf.Tensor] = None
+        self.q2_p: Optional[tf.Tensor] = ""
         self.q1_memory_in: Optional[tf.Tensor] = None
         self.q2_memory_in: Optional[tf.Tensor] = None
         self.q1_memory_out: Optional[tf.Tensor] = None

Mutant 367

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -47,7 +47,7 @@
         self.q2: Optional[tf.Tensor] = None
         self.q1_p: Optional[tf.Tensor] = None
         self.q2_p: Optional[tf.Tensor] = None
-        self.q1_memory_in: Optional[tf.Tensor] = None
+        self.q1_memory_in: Optional[tf.Tensor] = ""
         self.q2_memory_in: Optional[tf.Tensor] = None
         self.q1_memory_out: Optional[tf.Tensor] = None
         self.q2_memory_out: Optional[tf.Tensor] = None

Mutant 368

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -48,7 +48,7 @@
         self.q1_p: Optional[tf.Tensor] = None
         self.q2_p: Optional[tf.Tensor] = None
         self.q1_memory_in: Optional[tf.Tensor] = None
-        self.q2_memory_in: Optional[tf.Tensor] = None
+        self.q2_memory_in: Optional[tf.Tensor] = ""
         self.q1_memory_out: Optional[tf.Tensor] = None
         self.q2_memory_out: Optional[tf.Tensor] = None
         self.prev_action: Optional[tf.Tensor] = None

Mutant 369

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -49,7 +49,7 @@
         self.q2_p: Optional[tf.Tensor] = None
         self.q1_memory_in: Optional[tf.Tensor] = None
         self.q2_memory_in: Optional[tf.Tensor] = None
-        self.q1_memory_out: Optional[tf.Tensor] = None
+        self.q1_memory_out: Optional[tf.Tensor] = ""
         self.q2_memory_out: Optional[tf.Tensor] = None
         self.prev_action: Optional[tf.Tensor] = None
         self.action_masks: Optional[tf.Tensor] = None

Mutant 370

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -50,7 +50,7 @@
         self.q1_memory_in: Optional[tf.Tensor] = None
         self.q2_memory_in: Optional[tf.Tensor] = None
         self.q1_memory_out: Optional[tf.Tensor] = None
-        self.q2_memory_out: Optional[tf.Tensor] = None
+        self.q2_memory_out: Optional[tf.Tensor] = ""
         self.prev_action: Optional[tf.Tensor] = None
         self.action_masks: Optional[tf.Tensor] = None
         self.external_action_in: Optional[tf.Tensor] = None

Mutant 371

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -51,7 +51,7 @@
         self.q2_memory_in: Optional[tf.Tensor] = None
         self.q1_memory_out: Optional[tf.Tensor] = None
         self.q2_memory_out: Optional[tf.Tensor] = None
-        self.prev_action: Optional[tf.Tensor] = None
+        self.prev_action: Optional[tf.Tensor] = ""
         self.action_masks: Optional[tf.Tensor] = None
         self.external_action_in: Optional[tf.Tensor] = None
         self.log_sigma_sq: Optional[tf.Tensor] = None

Mutant 372

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -52,7 +52,7 @@
         self.q1_memory_out: Optional[tf.Tensor] = None
         self.q2_memory_out: Optional[tf.Tensor] = None
         self.prev_action: Optional[tf.Tensor] = None
-        self.action_masks: Optional[tf.Tensor] = None
+        self.action_masks: Optional[tf.Tensor] = ""
         self.external_action_in: Optional[tf.Tensor] = None
         self.log_sigma_sq: Optional[tf.Tensor] = None
         self.entropy: Optional[tf.Tensor] = None

Mutant 373

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -53,7 +53,7 @@
         self.q2_memory_out: Optional[tf.Tensor] = None
         self.prev_action: Optional[tf.Tensor] = None
         self.action_masks: Optional[tf.Tensor] = None
-        self.external_action_in: Optional[tf.Tensor] = None
+        self.external_action_in: Optional[tf.Tensor] = ""
         self.log_sigma_sq: Optional[tf.Tensor] = None
         self.entropy: Optional[tf.Tensor] = None
         self.deterministic_output: Optional[tf.Tensor] = None

Mutant 374

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -54,7 +54,7 @@
         self.prev_action: Optional[tf.Tensor] = None
         self.action_masks: Optional[tf.Tensor] = None
         self.external_action_in: Optional[tf.Tensor] = None
-        self.log_sigma_sq: Optional[tf.Tensor] = None
+        self.log_sigma_sq: Optional[tf.Tensor] = ""
         self.entropy: Optional[tf.Tensor] = None
         self.deterministic_output: Optional[tf.Tensor] = None
         self.normalized_logprobs: Optional[tf.Tensor] = None

Mutant 375

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -55,7 +55,7 @@
         self.action_masks: Optional[tf.Tensor] = None
         self.external_action_in: Optional[tf.Tensor] = None
         self.log_sigma_sq: Optional[tf.Tensor] = None
-        self.entropy: Optional[tf.Tensor] = None
+        self.entropy: Optional[tf.Tensor] = ""
         self.deterministic_output: Optional[tf.Tensor] = None
         self.normalized_logprobs: Optional[tf.Tensor] = None
         self.action_probs: Optional[tf.Tensor] = None

Mutant 376

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -56,7 +56,7 @@
         self.external_action_in: Optional[tf.Tensor] = None
         self.log_sigma_sq: Optional[tf.Tensor] = None
         self.entropy: Optional[tf.Tensor] = None
-        self.deterministic_output: Optional[tf.Tensor] = None
+        self.deterministic_output: Optional[tf.Tensor] = ""
         self.normalized_logprobs: Optional[tf.Tensor] = None
         self.action_probs: Optional[tf.Tensor] = None
         self.output_oh: Optional[tf.Tensor] = None

Mutant 377

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -57,7 +57,7 @@
         self.log_sigma_sq: Optional[tf.Tensor] = None
         self.entropy: Optional[tf.Tensor] = None
         self.deterministic_output: Optional[tf.Tensor] = None
-        self.normalized_logprobs: Optional[tf.Tensor] = None
+        self.normalized_logprobs: Optional[tf.Tensor] = ""
         self.action_probs: Optional[tf.Tensor] = None
         self.output_oh: Optional[tf.Tensor] = None
         self.output_pre: Optional[tf.Tensor] = None

Mutant 378

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -58,7 +58,7 @@
         self.entropy: Optional[tf.Tensor] = None
         self.deterministic_output: Optional[tf.Tensor] = None
         self.normalized_logprobs: Optional[tf.Tensor] = None
-        self.action_probs: Optional[tf.Tensor] = None
+        self.action_probs: Optional[tf.Tensor] = ""
         self.output_oh: Optional[tf.Tensor] = None
         self.output_pre: Optional[tf.Tensor] = None
 

Mutant 379

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -59,7 +59,7 @@
         self.deterministic_output: Optional[tf.Tensor] = None
         self.normalized_logprobs: Optional[tf.Tensor] = None
         self.action_probs: Optional[tf.Tensor] = None
-        self.output_oh: Optional[tf.Tensor] = None
+        self.output_oh: Optional[tf.Tensor] = ""
         self.output_pre: Optional[tf.Tensor] = None
 
         self.value_vars = None

Mutant 380

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -60,7 +60,7 @@
         self.normalized_logprobs: Optional[tf.Tensor] = None
         self.action_probs: Optional[tf.Tensor] = None
         self.output_oh: Optional[tf.Tensor] = None
-        self.output_pre: Optional[tf.Tensor] = None
+        self.output_pre: Optional[tf.Tensor] = ""
 
         self.value_vars = None
         self.q_vars = None

Mutant 381

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -62,7 +62,7 @@
         self.output_oh: Optional[tf.Tensor] = None
         self.output_pre: Optional[tf.Tensor] = None
 
-        self.value_vars = None
+        self.value_vars = ""
         self.q_vars = None
         self.critic_vars = None
         self.policy_vars = None

Mutant 382

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -63,7 +63,7 @@
         self.output_pre: Optional[tf.Tensor] = None
 
         self.value_vars = None
-        self.q_vars = None
+        self.q_vars = ""
         self.critic_vars = None
         self.policy_vars = None
 

Mutant 383

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -64,7 +64,7 @@
 
         self.value_vars = None
         self.q_vars = None
-        self.critic_vars = None
+        self.critic_vars = ""
         self.policy_vars = None
 
         self.q1_heads: Dict[str, tf.Tensor] = None

Mutant 384

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -65,7 +65,7 @@
         self.value_vars = None
         self.q_vars = None
         self.critic_vars = None
-        self.policy_vars = None
+        self.policy_vars = ""
 
         self.q1_heads: Dict[str, tf.Tensor] = None
         self.q2_heads: Dict[str, tf.Tensor] = None

Mutant 385

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -67,7 +67,7 @@
         self.critic_vars = None
         self.policy_vars = None
 
-        self.q1_heads: Dict[str, tf.Tensor] = None
+        self.q1_heads: Dict[str, tf.Tensor] = ""
         self.q2_heads: Dict[str, tf.Tensor] = None
         self.q1_pheads: Dict[str, tf.Tensor] = None
         self.q2_pheads: Dict[str, tf.Tensor] = None

Mutant 386

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -68,7 +68,7 @@
         self.policy_vars = None
 
         self.q1_heads: Dict[str, tf.Tensor] = None
-        self.q2_heads: Dict[str, tf.Tensor] = None
+        self.q2_heads: Dict[str, tf.Tensor] = ""
         self.q1_pheads: Dict[str, tf.Tensor] = None
         self.q2_pheads: Dict[str, tf.Tensor] = None
 

Mutant 387

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -69,7 +69,7 @@
 
         self.q1_heads: Dict[str, tf.Tensor] = None
         self.q2_heads: Dict[str, tf.Tensor] = None
-        self.q1_pheads: Dict[str, tf.Tensor] = None
+        self.q1_pheads: Dict[str, tf.Tensor] = ""
         self.q2_pheads: Dict[str, tf.Tensor] = None
 
         self.policy = policy

Mutant 388

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -70,7 +70,7 @@
         self.q1_heads: Dict[str, tf.Tensor] = None
         self.q2_heads: Dict[str, tf.Tensor] = None
         self.q1_pheads: Dict[str, tf.Tensor] = None
-        self.q2_pheads: Dict[str, tf.Tensor] = None
+        self.q2_pheads: Dict[str, tf.Tensor] = ""
 
         self.policy = policy
 

Mutant 397

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -102,7 +102,7 @@
         for name in stream_names:
             value = tf.layers.dense(hidden_input, 1, name=f"{name}_value")
             self.value_heads[name] = value
-        self.value = tf.reduce_mean(list(self.value_heads.values()), 0)
+        self.value = tf.reduce_mean(list(self.value_heads.values()), 1)
 
     def _create_cc_critic(self, hidden_value, scope, create_qs=True):
         """

Mutant 400

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -108,7 +108,7 @@
         """
         Creates just the critic network
         """
-        scope = self.join_scopes(scope, "critic")
+        scope = self.join_scopes(scope, "XXcriticXX")
         self.create_sac_value_head(
             self.stream_names,
             hidden_value,

Mutant 402

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -114,7 +114,7 @@
             hidden_value,
             self.num_layers,
             self.h_size,
-            self.join_scopes(scope, "value"),
+            self.join_scopes(scope, "XXvalueXX"),
         )
         self.external_action_in = tf.placeholder(
             shape=[None, self.policy.act_size[0]],

Mutant 404

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -119,7 +119,7 @@
         self.external_action_in = tf.placeholder(
             shape=[None, self.policy.act_size[0]],
             dtype=tf.float32,
-            name="external_action_in",
+            name="XXexternal_action_inXX",
         )
         self.value_vars = self.get_vars(self.join_scopes(scope, "value"))
         if create_qs:

Mutant 405

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -121,7 +121,7 @@
             dtype=tf.float32,
             name="external_action_in",
         )
-        self.value_vars = self.get_vars(self.join_scopes(scope, "value"))
+        self.value_vars = self.get_vars(self.join_scopes(scope, "XXvalueXX"))
         if create_qs:
             hidden_q = tf.concat([hidden_value, self.external_action_in], axis=-1)
             hidden_qp = tf.concat([hidden_value, self.policy.output], axis=-1)

Mutant 407

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -123,7 +123,7 @@
         )
         self.value_vars = self.get_vars(self.join_scopes(scope, "value"))
         if create_qs:
-            hidden_q = tf.concat([hidden_value, self.external_action_in], axis=-1)
+            hidden_q = tf.concat([hidden_value, self.external_action_in], axis=+1)
             hidden_qp = tf.concat([hidden_value, self.policy.output], axis=-1)
             self.q1_heads, self.q2_heads, self.q1, self.q2 = self.create_q_heads(
                 self.stream_names,

Mutant 410

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -124,7 +124,7 @@
         self.value_vars = self.get_vars(self.join_scopes(scope, "value"))
         if create_qs:
             hidden_q = tf.concat([hidden_value, self.external_action_in], axis=-1)
-            hidden_qp = tf.concat([hidden_value, self.policy.output], axis=-1)
+            hidden_qp = tf.concat([hidden_value, self.policy.output], axis=+1)
             self.q1_heads, self.q2_heads, self.q1, self.q2 = self.create_q_heads(
                 self.stream_names,
                 hidden_q,

Mutant 416

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -140,7 +140,7 @@
                 self.join_scopes(scope, "q"),
                 reuse=True,
             )
-            self.q_vars = self.get_vars(self.join_scopes(scope, "q"))
+            self.q_vars = self.get_vars(self.join_scopes(scope, "XXqXX"))
         self.critic_vars = self.get_vars(scope)
 
     def _create_dc_critic(self, hidden_value, scope, create_qs=True):

Mutant 431

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -195,7 +195,7 @@
         """
         with tf.variable_scope(scope):
             value_hidden = ModelUtils.create_vector_observation_encoder(
-                hidden_input, h_size, self.activ_fn, num_layers, "encoder", False
+                hidden_input, h_size, self.activ_fn, num_layers, "XXencoderXX", False
             )
             if self.use_recurrent:
                 value_hidden, memory_out = ModelUtils.create_recurrent_encoder(

Mutant 433

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -202,7 +202,7 @@
                     value_hidden,
                     self.value_memory_in,
                     self.sequence_length_ph,
-                    name="lstm_value",
+                    name="XXlstm_valueXX",
                 )
                 self.value_memory_out = memory_out
             self.create_value_heads(stream_names, value_hidden)

Mutant 435

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -230,7 +230,7 @@
         :param reuse: Whether or not to reuse variables. Useful for creating Q of policy.
         :param num_outputs: Number of outputs of each Q function. If discrete, equal to number of actions.
         """
-        with tf.variable_scope(self.join_scopes(scope, "q1_encoding"), reuse=reuse):
+        with tf.variable_scope(self.join_scopes(scope, "XXq1_encodingXX"), reuse=reuse):
             q1_hidden = ModelUtils.create_vector_observation_encoder(
                 hidden_input, h_size, self.activ_fn, num_layers, "q1_encoder", reuse
             )

Mutant 436

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -232,7 +232,7 @@
         """
         with tf.variable_scope(self.join_scopes(scope, "q1_encoding"), reuse=reuse):
             q1_hidden = ModelUtils.create_vector_observation_encoder(
-                hidden_input, h_size, self.activ_fn, num_layers, "q1_encoder", reuse
+                hidden_input, h_size, self.activ_fn, num_layers, "XXq1_encoderXX", reuse
             )
             if self.use_recurrent:
                 q1_hidden, memory_out = ModelUtils.create_recurrent_encoder(

Mutant 437

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -239,7 +239,7 @@
                     q1_hidden,
                     self.q1_memory_in,
                     self.sequence_length_ph,
-                    name="lstm_q1",
+                    name="XXlstm_q1XX",
                 )
                 self.q1_memory_out = memory_out
 

Mutant 444

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -249,7 +249,7 @@
                 q1_heads[name] = _q1
 
             q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)
-        with tf.variable_scope(self.join_scopes(scope, "q2_encoding"), reuse=reuse):
+        with tf.variable_scope(self.join_scopes(scope, "XXq2_encodingXX"), reuse=reuse):
             q2_hidden = ModelUtils.create_vector_observation_encoder(
                 hidden_input, h_size, self.activ_fn, num_layers, "q2_encoder", reuse
             )

Mutant 445

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -251,7 +251,7 @@
             q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)
         with tf.variable_scope(self.join_scopes(scope, "q2_encoding"), reuse=reuse):
             q2_hidden = ModelUtils.create_vector_observation_encoder(
-                hidden_input, h_size, self.activ_fn, num_layers, "q2_encoder", reuse
+                hidden_input, h_size, self.activ_fn, num_layers, "XXq2_encoderXX", reuse
             )
             if self.use_recurrent:
                 q2_hidden, memory_out = ModelUtils.create_recurrent_encoder(

Mutant 446

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -258,7 +258,7 @@
                     q2_hidden,
                     self.q2_memory_in,
                     self.sequence_length_ph,
-                    name="lstm_q2",
+                    name="XXlstm_q2XX",
                 )
                 self.q2_memory_out = memory_out
 

Mutant 451

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -267,7 +267,7 @@
                 _q2 = tf.layers.dense(q2_hidden, num_outputs, name=f"{name}_q2")
                 q2_heads[name] = _q2
 
-            q2 = tf.reduce_mean(list(q2_heads.values()), axis=0)
+            q2 = tf.reduce_mean(list(q2_heads.values()), axis=1)
 
         return q1_heads, q2_heads, q1, q2
 

Mutant 452

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -267,7 +267,7 @@
                 _q2 = tf.layers.dense(q2_hidden, num_outputs, name=f"{name}_q2")
                 q2_heads[name] = _q2
 
-            q2 = tf.reduce_mean(list(q2_heads.values()), axis=0)
+            q2 = None
 
         return q1_heads, q2_heads, q1, q2
 

Mutant 453

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -282,7 +282,7 @@
         self,
         policy,
         m_size=None,
-        h_size=128,
+        h_size=129,
         normalize=False,
         use_recurrent=False,
         num_layers=2,

Mutant 454

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -283,7 +283,7 @@
         policy,
         m_size=None,
         h_size=128,
-        normalize=False,
+        normalize=True,
         use_recurrent=False,
         num_layers=2,
         stream_names=None,

Mutant 455

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -284,7 +284,7 @@
         m_size=None,
         h_size=128,
         normalize=False,
-        use_recurrent=False,
+        use_recurrent=True,
         num_layers=2,
         stream_names=None,
         vis_encode_type=EncoderType.SIMPLE,

Mutant 456

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -285,7 +285,7 @@
         h_size=128,
         normalize=False,
         use_recurrent=False,
-        num_layers=2,
+        num_layers=3,
         stream_names=None,
         vis_encode_type=EncoderType.SIMPLE,
     ):

Mutant 458

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -317,7 +317,7 @@
                 )
             else:
                 self.processed_vector_in = self.vector_in
-                self.update_normalization_op = None
+                self.update_normalization_op = ""
 
             if self.policy.use_recurrent:
                 self.memory_in = tf.placeholder(

Mutant 459

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -321,7 +321,7 @@
 
             if self.policy.use_recurrent:
                 self.memory_in = tf.placeholder(
-                    shape=[None, m_size], dtype=tf.float32, name="target_recurrent_in"
+                    shape=[None, m_size], dtype=tf.float32, name="XXtarget_recurrent_inXX"
                 )
                 self.value_memory_in = self.memory_in
             hidden_streams = ModelUtils.create_observation_streams(

Mutant 468

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -339,7 +339,7 @@
             self._create_dc_critic(hidden_streams[0], TARGET_SCOPE, create_qs=False)
         if self.use_recurrent:
             self.memory_out = tf.concat(
-                self.value_memory_out, axis=1
+                self.value_memory_out, axis=2
             )  # Needed for Barracuda to work
 
     def copy_normalization(self, mean, variance, steps):

Mutant 469

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -367,7 +367,7 @@
         self,
         policy,
         m_size=None,
-        h_size=128,
+        h_size=129,
         normalize=False,
         use_recurrent=False,
         num_layers=2,

Mutant 470

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -368,7 +368,7 @@
         policy,
         m_size=None,
         h_size=128,
-        normalize=False,
+        normalize=True,
         use_recurrent=False,
         num_layers=2,
         stream_names=None,

Mutant 471

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -369,7 +369,7 @@
         m_size=None,
         h_size=128,
         normalize=False,
-        use_recurrent=False,
+        use_recurrent=True,
         num_layers=2,
         stream_names=None,
         vis_encode_type=EncoderType.SIMPLE,

Mutant 472

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -370,7 +370,7 @@
         h_size=128,
         normalize=False,
         use_recurrent=False,
-        num_layers=2,
+        num_layers=3,
         stream_names=None,
         vis_encode_type=EncoderType.SIMPLE,
     ):

Mutant 480

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -407,7 +407,7 @@
         :param m_size: the total size of the memory.
         """
         self.memory_in = tf.placeholder(
-            shape=[None, m_size * 3], dtype=tf.float32, name="value_recurrent_in"
+            shape=[None, m_size * 3], dtype=tf.float32, name="XXvalue_recurrent_inXX"
         )
 
         # Re-break-up for each network

Mutant 494

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -418,7 +418,7 @@
             _start = input_size // num_mems * i
             _end = input_size // num_mems * (i + 1)
             mem_ins.append(self.memory_in[:, _start:_end])
-        self.value_memory_in = mem_ins[0]
+        self.value_memory_in = mem_ins[1]
         self.q1_memory_in = mem_ins[1]
         self.q2_memory_in = mem_ins[2]
 

Mutant 496

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -419,7 +419,7 @@
             _end = input_size // num_mems * (i + 1)
             mem_ins.append(self.memory_in[:, _start:_end])
         self.value_memory_in = mem_ins[0]
-        self.q1_memory_in = mem_ins[1]
+        self.q1_memory_in = mem_ins[2]
         self.q2_memory_in = mem_ins[2]
 
     def _create_observation_in(self, vis_encode_type):