ml-agents/mlagents/trainers/sac/network.py

Killed 88 out of 172 mutants

Survived

Survived mutation testing. These mutants show holes in your test suite.

Mutant 254

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -3,7 +3,7 @@
 from mlagents.trainers.tf.models import ModelUtils
 from mlagents.trainers.settings import EncoderType
 
-LOG_STD_MAX = 2
+LOG_STD_MAX = 3
 LOG_STD_MIN = -20
 EPSILON = 1e-6  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05

Mutant 255

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -3,7 +3,7 @@
 from mlagents.trainers.tf.models import ModelUtils
 from mlagents.trainers.settings import EncoderType
 
-LOG_STD_MAX = 2
+LOG_STD_MAX = None
 LOG_STD_MIN = -20
 EPSILON = 1e-6  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05

Mutant 256

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -4,7 +4,7 @@
 from mlagents.trainers.settings import EncoderType
 
 LOG_STD_MAX = 2
-LOG_STD_MIN = -20
+LOG_STD_MIN = +20
 EPSILON = 1e-6  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.

Mutant 257

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -4,7 +4,7 @@
 from mlagents.trainers.settings import EncoderType
 
 LOG_STD_MAX = 2
-LOG_STD_MIN = -20
+LOG_STD_MIN = -21
 EPSILON = 1e-6  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.

Mutant 258

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -4,7 +4,7 @@
 from mlagents.trainers.settings import EncoderType
 
 LOG_STD_MAX = 2
-LOG_STD_MIN = -20
+LOG_STD_MIN = None
 EPSILON = 1e-6  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.

Mutant 259

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -5,7 +5,7 @@
 
 LOG_STD_MAX = 2
 LOG_STD_MIN = -20
-EPSILON = 1e-6  # Small value to avoid divide by zero
+EPSILON = 1.000001  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.
 POLICY_SCOPE = ""

Mutant 260

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -5,7 +5,7 @@
 
 LOG_STD_MAX = 2
 LOG_STD_MIN = -20
-EPSILON = 1e-6  # Small value to avoid divide by zero
+EPSILON = None  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.
 POLICY_SCOPE = ""

Mutant 261

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -6,7 +6,7 @@
 LOG_STD_MAX = 2
 LOG_STD_MIN = -20
 EPSILON = 1e-6  # Small value to avoid divide by zero
-DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
+DISCRETE_TARGET_ENTROPY_SCALE = 1.2  # Roughly equal to e-greedy 0.05
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.
 POLICY_SCOPE = ""
 TARGET_SCOPE = "target_network"

Mutant 262

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -6,7 +6,7 @@
 LOG_STD_MAX = 2
 LOG_STD_MIN = -20
 EPSILON = 1e-6  # Small value to avoid divide by zero
-DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
+DISCRETE_TARGET_ENTROPY_SCALE = None  # Roughly equal to e-greedy 0.05
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.
 POLICY_SCOPE = ""
 TARGET_SCOPE = "target_network"

Mutant 263

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -7,7 +7,7 @@
 LOG_STD_MIN = -20
 EPSILON = 1e-6  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
-CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.
+CONTINUOUS_TARGET_ENTROPY_SCALE = 2.0  # TODO: Make these an optional hyperparam.
 POLICY_SCOPE = ""
 TARGET_SCOPE = "target_network"
 

Mutant 264

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -7,7 +7,7 @@
 LOG_STD_MIN = -20
 EPSILON = 1e-6  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
-CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.
+CONTINUOUS_TARGET_ENTROPY_SCALE = None  # TODO: Make these an optional hyperparam.
 POLICY_SCOPE = ""
 TARGET_SCOPE = "target_network"
 

Mutant 265

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -8,7 +8,7 @@
 EPSILON = 1e-6  # Small value to avoid divide by zero
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.
-POLICY_SCOPE = ""
+POLICY_SCOPE = "XXXX"
 TARGET_SCOPE = "target_network"
 
 

Mutant 267

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -9,7 +9,7 @@
 DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.
 POLICY_SCOPE = ""
-TARGET_SCOPE = "target_network"
+TARGET_SCOPE = "XXtarget_networkXX"
 
 
 class SACNetwork:

Mutant 269

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -21,7 +21,7 @@
         self,
         policy=None,
         m_size=None,
-        h_size=128,
+        h_size=129,
         normalize=False,
         use_recurrent=False,
         num_layers=2,

Mutant 270

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -22,7 +22,7 @@
         policy=None,
         m_size=None,
         h_size=128,
-        normalize=False,
+        normalize=True,
         use_recurrent=False,
         num_layers=2,
         stream_names=None,

Mutant 271

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -23,7 +23,7 @@
         m_size=None,
         h_size=128,
         normalize=False,
-        use_recurrent=False,
+        use_recurrent=True,
         num_layers=2,
         stream_names=None,
         vis_encode_type=EncoderType.SIMPLE,

Mutant 272

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -24,7 +24,7 @@
         h_size=128,
         normalize=False,
         use_recurrent=False,
-        num_layers=2,
+        num_layers=3,
         stream_names=None,
         vis_encode_type=EncoderType.SIMPLE,
     ):

Mutant 273

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -28,7 +28,7 @@
         stream_names=None,
         vis_encode_type=EncoderType.SIMPLE,
     ):
-        self.normalize = normalize
+        self.normalize = None
         self.use_recurrent = use_recurrent
         self.num_layers = num_layers
         self.stream_names = stream_names

Mutant 279

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -36,7 +36,7 @@
         self.activ_fn = ModelUtils.swish
 
         self.sequence_length_ph = tf.placeholder(
-            shape=None, dtype=tf.int32, name="sac_sequence_length"
+            shape=None, dtype=tf.int32, name="XXsac_sequence_lengthXX"
         )
 
         self.policy_memory_in: Optional[tf.Tensor] = None

Mutant 280

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -39,7 +39,7 @@
             shape=None, dtype=tf.int32, name="sac_sequence_length"
         )
 
-        self.policy_memory_in: Optional[tf.Tensor] = None
+        self.policy_memory_in: Optional[tf.Tensor] = ""
         self.policy_memory_out: Optional[tf.Tensor] = None
         self.value_memory_in: Optional[tf.Tensor] = None
         self.value_memory_out: Optional[tf.Tensor] = None

Mutant 281

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -40,7 +40,7 @@
         )
 
         self.policy_memory_in: Optional[tf.Tensor] = None
-        self.policy_memory_out: Optional[tf.Tensor] = None
+        self.policy_memory_out: Optional[tf.Tensor] = ""
         self.value_memory_in: Optional[tf.Tensor] = None
         self.value_memory_out: Optional[tf.Tensor] = None
         self.q1: Optional[tf.Tensor] = None

Mutant 282

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -41,7 +41,7 @@
 
         self.policy_memory_in: Optional[tf.Tensor] = None
         self.policy_memory_out: Optional[tf.Tensor] = None
-        self.value_memory_in: Optional[tf.Tensor] = None
+        self.value_memory_in: Optional[tf.Tensor] = ""
         self.value_memory_out: Optional[tf.Tensor] = None
         self.q1: Optional[tf.Tensor] = None
         self.q2: Optional[tf.Tensor] = None

Mutant 283

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -42,7 +42,7 @@
         self.policy_memory_in: Optional[tf.Tensor] = None
         self.policy_memory_out: Optional[tf.Tensor] = None
         self.value_memory_in: Optional[tf.Tensor] = None
-        self.value_memory_out: Optional[tf.Tensor] = None
+        self.value_memory_out: Optional[tf.Tensor] = ""
         self.q1: Optional[tf.Tensor] = None
         self.q2: Optional[tf.Tensor] = None
         self.q1_p: Optional[tf.Tensor] = None

Mutant 284

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -43,7 +43,7 @@
         self.policy_memory_out: Optional[tf.Tensor] = None
         self.value_memory_in: Optional[tf.Tensor] = None
         self.value_memory_out: Optional[tf.Tensor] = None
-        self.q1: Optional[tf.Tensor] = None
+        self.q1: Optional[tf.Tensor] = ""
         self.q2: Optional[tf.Tensor] = None
         self.q1_p: Optional[tf.Tensor] = None
         self.q2_p: Optional[tf.Tensor] = None

Mutant 285

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -44,7 +44,7 @@
         self.value_memory_in: Optional[tf.Tensor] = None
         self.value_memory_out: Optional[tf.Tensor] = None
         self.q1: Optional[tf.Tensor] = None
-        self.q2: Optional[tf.Tensor] = None
+        self.q2: Optional[tf.Tensor] = ""
         self.q1_p: Optional[tf.Tensor] = None
         self.q2_p: Optional[tf.Tensor] = None
         self.q1_memory_in: Optional[tf.Tensor] = None

Mutant 286

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -45,7 +45,7 @@
         self.value_memory_out: Optional[tf.Tensor] = None
         self.q1: Optional[tf.Tensor] = None
         self.q2: Optional[tf.Tensor] = None
-        self.q1_p: Optional[tf.Tensor] = None
+        self.q1_p: Optional[tf.Tensor] = ""
         self.q2_p: Optional[tf.Tensor] = None
         self.q1_memory_in: Optional[tf.Tensor] = None
         self.q2_memory_in: Optional[tf.Tensor] = None

Mutant 287

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -46,7 +46,7 @@
         self.q1: Optional[tf.Tensor] = None
         self.q2: Optional[tf.Tensor] = None
         self.q1_p: Optional[tf.Tensor] = None
-        self.q2_p: Optional[tf.Tensor] = None
+        self.q2_p: Optional[tf.Tensor] = ""
         self.q1_memory_in: Optional[tf.Tensor] = None
         self.q2_memory_in: Optional[tf.Tensor] = None
         self.q1_memory_out: Optional[tf.Tensor] = None

Mutant 288

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -47,7 +47,7 @@
         self.q2: Optional[tf.Tensor] = None
         self.q1_p: Optional[tf.Tensor] = None
         self.q2_p: Optional[tf.Tensor] = None
-        self.q1_memory_in: Optional[tf.Tensor] = None
+        self.q1_memory_in: Optional[tf.Tensor] = ""
         self.q2_memory_in: Optional[tf.Tensor] = None
         self.q1_memory_out: Optional[tf.Tensor] = None
         self.q2_memory_out: Optional[tf.Tensor] = None

Mutant 289

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -48,7 +48,7 @@
         self.q1_p: Optional[tf.Tensor] = None
         self.q2_p: Optional[tf.Tensor] = None
         self.q1_memory_in: Optional[tf.Tensor] = None
-        self.q2_memory_in: Optional[tf.Tensor] = None
+        self.q2_memory_in: Optional[tf.Tensor] = ""
         self.q1_memory_out: Optional[tf.Tensor] = None
         self.q2_memory_out: Optional[tf.Tensor] = None
         self.prev_action: Optional[tf.Tensor] = None

Mutant 290

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -49,7 +49,7 @@
         self.q2_p: Optional[tf.Tensor] = None
         self.q1_memory_in: Optional[tf.Tensor] = None
         self.q2_memory_in: Optional[tf.Tensor] = None
-        self.q1_memory_out: Optional[tf.Tensor] = None
+        self.q1_memory_out: Optional[tf.Tensor] = ""
         self.q2_memory_out: Optional[tf.Tensor] = None
         self.prev_action: Optional[tf.Tensor] = None
         self.action_masks: Optional[tf.Tensor] = None

Mutant 291

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -50,7 +50,7 @@
         self.q1_memory_in: Optional[tf.Tensor] = None
         self.q2_memory_in: Optional[tf.Tensor] = None
         self.q1_memory_out: Optional[tf.Tensor] = None
-        self.q2_memory_out: Optional[tf.Tensor] = None
+        self.q2_memory_out: Optional[tf.Tensor] = ""
         self.prev_action: Optional[tf.Tensor] = None
         self.action_masks: Optional[tf.Tensor] = None
         self.external_action_in: Optional[tf.Tensor] = None

Mutant 292

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -51,7 +51,7 @@
         self.q2_memory_in: Optional[tf.Tensor] = None
         self.q1_memory_out: Optional[tf.Tensor] = None
         self.q2_memory_out: Optional[tf.Tensor] = None
-        self.prev_action: Optional[tf.Tensor] = None
+        self.prev_action: Optional[tf.Tensor] = ""
         self.action_masks: Optional[tf.Tensor] = None
         self.external_action_in: Optional[tf.Tensor] = None
         self.log_sigma_sq: Optional[tf.Tensor] = None

Mutant 293

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -52,7 +52,7 @@
         self.q1_memory_out: Optional[tf.Tensor] = None
         self.q2_memory_out: Optional[tf.Tensor] = None
         self.prev_action: Optional[tf.Tensor] = None
-        self.action_masks: Optional[tf.Tensor] = None
+        self.action_masks: Optional[tf.Tensor] = ""
         self.external_action_in: Optional[tf.Tensor] = None
         self.log_sigma_sq: Optional[tf.Tensor] = None
         self.entropy: Optional[tf.Tensor] = None

Mutant 294

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -53,7 +53,7 @@
         self.q2_memory_out: Optional[tf.Tensor] = None
         self.prev_action: Optional[tf.Tensor] = None
         self.action_masks: Optional[tf.Tensor] = None
-        self.external_action_in: Optional[tf.Tensor] = None
+        self.external_action_in: Optional[tf.Tensor] = ""
         self.log_sigma_sq: Optional[tf.Tensor] = None
         self.entropy: Optional[tf.Tensor] = None
         self.deterministic_output: Optional[tf.Tensor] = None

Mutant 295

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -54,7 +54,7 @@
         self.prev_action: Optional[tf.Tensor] = None
         self.action_masks: Optional[tf.Tensor] = None
         self.external_action_in: Optional[tf.Tensor] = None
-        self.log_sigma_sq: Optional[tf.Tensor] = None
+        self.log_sigma_sq: Optional[tf.Tensor] = ""
         self.entropy: Optional[tf.Tensor] = None
         self.deterministic_output: Optional[tf.Tensor] = None
         self.normalized_logprobs: Optional[tf.Tensor] = None

Mutant 296

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -55,7 +55,7 @@
         self.action_masks: Optional[tf.Tensor] = None
         self.external_action_in: Optional[tf.Tensor] = None
         self.log_sigma_sq: Optional[tf.Tensor] = None
-        self.entropy: Optional[tf.Tensor] = None
+        self.entropy: Optional[tf.Tensor] = ""
         self.deterministic_output: Optional[tf.Tensor] = None
         self.normalized_logprobs: Optional[tf.Tensor] = None
         self.action_probs: Optional[tf.Tensor] = None

Mutant 297

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -56,7 +56,7 @@
         self.external_action_in: Optional[tf.Tensor] = None
         self.log_sigma_sq: Optional[tf.Tensor] = None
         self.entropy: Optional[tf.Tensor] = None
-        self.deterministic_output: Optional[tf.Tensor] = None
+        self.deterministic_output: Optional[tf.Tensor] = ""
         self.normalized_logprobs: Optional[tf.Tensor] = None
         self.action_probs: Optional[tf.Tensor] = None
         self.output_oh: Optional[tf.Tensor] = None

Mutant 298

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -57,7 +57,7 @@
         self.log_sigma_sq: Optional[tf.Tensor] = None
         self.entropy: Optional[tf.Tensor] = None
         self.deterministic_output: Optional[tf.Tensor] = None
-        self.normalized_logprobs: Optional[tf.Tensor] = None
+        self.normalized_logprobs: Optional[tf.Tensor] = ""
         self.action_probs: Optional[tf.Tensor] = None
         self.output_oh: Optional[tf.Tensor] = None
         self.output_pre: Optional[tf.Tensor] = None

Mutant 299

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -58,7 +58,7 @@
         self.entropy: Optional[tf.Tensor] = None
         self.deterministic_output: Optional[tf.Tensor] = None
         self.normalized_logprobs: Optional[tf.Tensor] = None
-        self.action_probs: Optional[tf.Tensor] = None
+        self.action_probs: Optional[tf.Tensor] = ""
         self.output_oh: Optional[tf.Tensor] = None
         self.output_pre: Optional[tf.Tensor] = None
 

Mutant 300

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -59,7 +59,7 @@
         self.deterministic_output: Optional[tf.Tensor] = None
         self.normalized_logprobs: Optional[tf.Tensor] = None
         self.action_probs: Optional[tf.Tensor] = None
-        self.output_oh: Optional[tf.Tensor] = None
+        self.output_oh: Optional[tf.Tensor] = ""
         self.output_pre: Optional[tf.Tensor] = None
 
         self.value_vars = None

Mutant 301

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -60,7 +60,7 @@
         self.normalized_logprobs: Optional[tf.Tensor] = None
         self.action_probs: Optional[tf.Tensor] = None
         self.output_oh: Optional[tf.Tensor] = None
-        self.output_pre: Optional[tf.Tensor] = None
+        self.output_pre: Optional[tf.Tensor] = ""
 
         self.value_vars = None
         self.q_vars = None

Mutant 302

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -62,7 +62,7 @@
         self.output_oh: Optional[tf.Tensor] = None
         self.output_pre: Optional[tf.Tensor] = None
 
-        self.value_vars = None
+        self.value_vars = ""
         self.q_vars = None
         self.critic_vars = None
         self.policy_vars = None

Mutant 303

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -63,7 +63,7 @@
         self.output_pre: Optional[tf.Tensor] = None
 
         self.value_vars = None
-        self.q_vars = None
+        self.q_vars = ""
         self.critic_vars = None
         self.policy_vars = None
 

Mutant 304

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -64,7 +64,7 @@
 
         self.value_vars = None
         self.q_vars = None
-        self.critic_vars = None
+        self.critic_vars = ""
         self.policy_vars = None
 
         self.q1_heads: Dict[str, tf.Tensor] = None

Mutant 305

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -65,7 +65,7 @@
         self.value_vars = None
         self.q_vars = None
         self.critic_vars = None
-        self.policy_vars = None
+        self.policy_vars = ""
 
         self.q1_heads: Dict[str, tf.Tensor] = None
         self.q2_heads: Dict[str, tf.Tensor] = None

Mutant 306

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -67,7 +67,7 @@
         self.critic_vars = None
         self.policy_vars = None
 
-        self.q1_heads: Dict[str, tf.Tensor] = None
+        self.q1_heads: Dict[str, tf.Tensor] = ""
         self.q2_heads: Dict[str, tf.Tensor] = None
         self.q1_pheads: Dict[str, tf.Tensor] = None
         self.q2_pheads: Dict[str, tf.Tensor] = None

Mutant 307

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -68,7 +68,7 @@
         self.policy_vars = None
 
         self.q1_heads: Dict[str, tf.Tensor] = None
-        self.q2_heads: Dict[str, tf.Tensor] = None
+        self.q2_heads: Dict[str, tf.Tensor] = ""
         self.q1_pheads: Dict[str, tf.Tensor] = None
         self.q2_pheads: Dict[str, tf.Tensor] = None
 

Mutant 308

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -69,7 +69,7 @@
 
         self.q1_heads: Dict[str, tf.Tensor] = None
         self.q2_heads: Dict[str, tf.Tensor] = None
-        self.q1_pheads: Dict[str, tf.Tensor] = None
+        self.q1_pheads: Dict[str, tf.Tensor] = ""
         self.q2_pheads: Dict[str, tf.Tensor] = None
 
         self.policy = policy

Mutant 309

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -70,7 +70,7 @@
         self.q1_heads: Dict[str, tf.Tensor] = None
         self.q2_heads: Dict[str, tf.Tensor] = None
         self.q1_pheads: Dict[str, tf.Tensor] = None
-        self.q2_pheads: Dict[str, tf.Tensor] = None
+        self.q2_pheads: Dict[str, tf.Tensor] = ""
 
         self.policy = policy
 

Mutant 312

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -84,7 +84,7 @@
         """
         if not scope_1:
             return scope_2
-        if not scope_2:
+        if  scope_2:
             return scope_1
         else:
             return "/".join(filter(None, [scope_1, scope_2]))

Mutant 313

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -87,7 +87,7 @@
         if not scope_2:
             return scope_1
         else:
-            return "/".join(filter(None, [scope_1, scope_2]))
+            return "XX/XX".join(filter(None, [scope_1, scope_2]))
 
     def create_value_heads(self, stream_names, hidden_input):
         """

Mutant 318

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -102,7 +102,7 @@
         for name in stream_names:
             value = tf.layers.dense(hidden_input, 1, name=f"{name}_value")
             self.value_heads[name] = value
-        self.value = tf.reduce_mean(list(self.value_heads.values()), 0)
+        self.value = tf.reduce_mean(list(self.value_heads.values()), 1)
 
     def _create_cc_critic(self, hidden_value, scope, create_qs=True):
         """

Mutant 321

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -108,7 +108,7 @@
         """
         Creates just the critic network
         """
-        scope = self.join_scopes(scope, "critic")
+        scope = self.join_scopes(scope, "XXcriticXX")
         self.create_sac_value_head(
             self.stream_names,
             hidden_value,

Mutant 325

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -119,7 +119,7 @@
         self.external_action_in = tf.placeholder(
             shape=[None, self.policy.act_size[0]],
             dtype=tf.float32,
-            name="external_action_in",
+            name="XXexternal_action_inXX",
         )
         self.value_vars = self.get_vars(self.join_scopes(scope, "value"))
         if create_qs:

Mutant 328

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -123,7 +123,7 @@
         )
         self.value_vars = self.get_vars(self.join_scopes(scope, "value"))
         if create_qs:
-            hidden_q = tf.concat([hidden_value, self.external_action_in], axis=-1)
+            hidden_q = tf.concat([hidden_value, self.external_action_in], axis=+1)
             hidden_qp = tf.concat([hidden_value, self.policy.output], axis=-1)
             self.q1_heads, self.q2_heads, self.q1, self.q2 = self.create_q_heads(
                 self.stream_names,

Mutant 331

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -124,7 +124,7 @@
         self.value_vars = self.get_vars(self.join_scopes(scope, "value"))
         if create_qs:
             hidden_q = tf.concat([hidden_value, self.external_action_in], axis=-1)
-            hidden_qp = tf.concat([hidden_value, self.policy.output], axis=-1)
+            hidden_qp = tf.concat([hidden_value, self.policy.output], axis=+1)
             self.q1_heads, self.q2_heads, self.q1, self.q2 = self.create_q_heads(
                 self.stream_names,
                 hidden_q,

Mutant 337

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -140,7 +140,7 @@
                 self.join_scopes(scope, "q"),
                 reuse=True,
             )
-            self.q_vars = self.get_vars(self.join_scopes(scope, "q"))
+            self.q_vars = self.get_vars(self.join_scopes(scope, "XXqXX"))
         self.critic_vars = self.get_vars(scope)
 
     def _create_dc_critic(self, hidden_value, scope, create_qs=True):

Mutant 343

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -153,7 +153,7 @@
             hidden_value,
             self.num_layers,
             self.h_size,
-            self.join_scopes(scope, "value"),
+            self.join_scopes(scope, "XXvalueXX"),
         )
 
         self.value_vars = self.get_vars("/".join([scope, "value"]))

Mutant 344

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -156,7 +156,7 @@
             self.join_scopes(scope, "value"),
         )
 
-        self.value_vars = self.get_vars("/".join([scope, "value"]))
+        self.value_vars = self.get_vars("XX/XX".join([scope, "value"]))
 
         if create_qs:
             self.q1_heads, self.q2_heads, self.q1, self.q2 = self.create_q_heads(

Mutant 345

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -156,7 +156,7 @@
             self.join_scopes(scope, "value"),
         )
 
-        self.value_vars = self.get_vars("/".join([scope, "value"]))
+        self.value_vars = self.get_vars("/".join([scope, "XXvalueXX"]))
 
         if create_qs:
             self.q1_heads, self.q2_heads, self.q1, self.q2 = self.create_q_heads(

Mutant 352

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -195,7 +195,7 @@
         """
         with tf.variable_scope(scope):
             value_hidden = ModelUtils.create_vector_observation_encoder(
-                hidden_input, h_size, self.activ_fn, num_layers, "encoder", False
+                hidden_input, h_size, self.activ_fn, num_layers, "XXencoderXX", False
             )
             if self.use_recurrent:
                 value_hidden, memory_out = ModelUtils.create_recurrent_encoder(

Mutant 354

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -202,7 +202,7 @@
                     value_hidden,
                     self.value_memory_in,
                     self.sequence_length_ph,
-                    name="lstm_value",
+                    name="XXlstm_valueXX",
                 )
                 self.value_memory_out = memory_out
             self.create_value_heads(stream_names, value_hidden)

Mutant 356

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -230,7 +230,7 @@
         :param reuse: Whether or not to reuse variables. Useful for creating Q of policy.
         :param num_outputs: Number of outputs of each Q function. If discrete, equal to number of actions.
         """
-        with tf.variable_scope(self.join_scopes(scope, "q1_encoding"), reuse=reuse):
+        with tf.variable_scope(self.join_scopes(scope, "XXq1_encodingXX"), reuse=reuse):
             q1_hidden = ModelUtils.create_vector_observation_encoder(
                 hidden_input, h_size, self.activ_fn, num_layers, "q1_encoder", reuse
             )

Mutant 357

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -232,7 +232,7 @@
         """
         with tf.variable_scope(self.join_scopes(scope, "q1_encoding"), reuse=reuse):
             q1_hidden = ModelUtils.create_vector_observation_encoder(
-                hidden_input, h_size, self.activ_fn, num_layers, "q1_encoder", reuse
+                hidden_input, h_size, self.activ_fn, num_layers, "XXq1_encoderXX", reuse
             )
             if self.use_recurrent:
                 q1_hidden, memory_out = ModelUtils.create_recurrent_encoder(

Mutant 358

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -239,7 +239,7 @@
                     q1_hidden,
                     self.q1_memory_in,
                     self.sequence_length_ph,
-                    name="lstm_q1",
+                    name="XXlstm_q1XX",
                 )
                 self.q1_memory_out = memory_out
 

Mutant 365

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -249,7 +249,7 @@
                 q1_heads[name] = _q1
 
             q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)
-        with tf.variable_scope(self.join_scopes(scope, "q2_encoding"), reuse=reuse):
+        with tf.variable_scope(self.join_scopes(scope, "XXq2_encodingXX"), reuse=reuse):
             q2_hidden = ModelUtils.create_vector_observation_encoder(
                 hidden_input, h_size, self.activ_fn, num_layers, "q2_encoder", reuse
             )

Mutant 366

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -251,7 +251,7 @@
             q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)
         with tf.variable_scope(self.join_scopes(scope, "q2_encoding"), reuse=reuse):
             q2_hidden = ModelUtils.create_vector_observation_encoder(
-                hidden_input, h_size, self.activ_fn, num_layers, "q2_encoder", reuse
+                hidden_input, h_size, self.activ_fn, num_layers, "XXq2_encoderXX", reuse
             )
             if self.use_recurrent:
                 q2_hidden, memory_out = ModelUtils.create_recurrent_encoder(

Mutant 367

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -258,7 +258,7 @@
                     q2_hidden,
                     self.q2_memory_in,
                     self.sequence_length_ph,
-                    name="lstm_q2",
+                    name="XXlstm_q2XX",
                 )
                 self.q2_memory_out = memory_out
 

Mutant 372

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -267,7 +267,7 @@
                 _q2 = tf.layers.dense(q2_hidden, num_outputs, name=f"{name}_q2")
                 q2_heads[name] = _q2
 
-            q2 = tf.reduce_mean(list(q2_heads.values()), axis=0)
+            q2 = tf.reduce_mean(list(q2_heads.values()), axis=1)
 
         return q1_heads, q2_heads, q1, q2
 

Mutant 373

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -267,7 +267,7 @@
                 _q2 = tf.layers.dense(q2_hidden, num_outputs, name=f"{name}_q2")
                 q2_heads[name] = _q2
 
-            q2 = tf.reduce_mean(list(q2_heads.values()), axis=0)
+            q2 = None
 
         return q1_heads, q2_heads, q1, q2
 

Mutant 374

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -282,7 +282,7 @@
         self,
         policy,
         m_size=None,
-        h_size=128,
+        h_size=129,
         normalize=False,
         use_recurrent=False,
         num_layers=2,

Mutant 375

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -283,7 +283,7 @@
         policy,
         m_size=None,
         h_size=128,
-        normalize=False,
+        normalize=True,
         use_recurrent=False,
         num_layers=2,
         stream_names=None,

Mutant 376

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -284,7 +284,7 @@
         m_size=None,
         h_size=128,
         normalize=False,
-        use_recurrent=False,
+        use_recurrent=True,
         num_layers=2,
         stream_names=None,
         vis_encode_type=EncoderType.SIMPLE,

Mutant 377

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -285,7 +285,7 @@
         h_size=128,
         normalize=False,
         use_recurrent=False,
-        num_layers=2,
+        num_layers=3,
         stream_names=None,
         vis_encode_type=EncoderType.SIMPLE,
     ):

Mutant 379

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -317,7 +317,7 @@
                 )
             else:
                 self.processed_vector_in = self.vector_in
-                self.update_normalization_op = None
+                self.update_normalization_op = ""
 
             if self.policy.use_recurrent:
                 self.memory_in = tf.placeholder(

Mutant 380

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -321,7 +321,7 @@
 
             if self.policy.use_recurrent:
                 self.memory_in = tf.placeholder(
-                    shape=[None, m_size], dtype=tf.float32, name="target_recurrent_in"
+                    shape=[None, m_size], dtype=tf.float32, name="XXtarget_recurrent_inXX"
                 )
                 self.value_memory_in = self.memory_in
             hidden_streams = ModelUtils.create_observation_streams(

Mutant 389

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -339,7 +339,7 @@
             self._create_dc_critic(hidden_streams[0], TARGET_SCOPE, create_qs=False)
         if self.use_recurrent:
             self.memory_out = tf.concat(
-                self.value_memory_out, axis=1
+                self.value_memory_out, axis=2
             )  # Needed for Barracuda to work
 
     def copy_normalization(self, mean, variance, steps):

Mutant 390

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -367,7 +367,7 @@
         self,
         policy,
         m_size=None,
-        h_size=128,
+        h_size=129,
         normalize=False,
         use_recurrent=False,
         num_layers=2,

Mutant 391

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -368,7 +368,7 @@
         policy,
         m_size=None,
         h_size=128,
-        normalize=False,
+        normalize=True,
         use_recurrent=False,
         num_layers=2,
         stream_names=None,

Mutant 392

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -369,7 +369,7 @@
         m_size=None,
         h_size=128,
         normalize=False,
-        use_recurrent=False,
+        use_recurrent=True,
         num_layers=2,
         stream_names=None,
         vis_encode_type=EncoderType.SIMPLE,

Mutant 393

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -370,7 +370,7 @@
         h_size=128,
         normalize=False,
         use_recurrent=False,
-        num_layers=2,
+        num_layers=3,
         stream_names=None,
         vis_encode_type=EncoderType.SIMPLE,
     ):

Mutant 401

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -407,7 +407,7 @@
         :param m_size: the total size of the memory.
         """
         self.memory_in = tf.placeholder(
-            shape=[None, m_size * 3], dtype=tf.float32, name="value_recurrent_in"
+            shape=[None, m_size * 3], dtype=tf.float32, name="XXvalue_recurrent_inXX"
         )
 
         # Re-break-up for each network

Mutant 415

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -418,7 +418,7 @@
             _start = input_size // num_mems * i
             _end = input_size // num_mems * (i + 1)
             mem_ins.append(self.memory_in[:, _start:_end])
-        self.value_memory_in = mem_ins[0]
+        self.value_memory_in = mem_ins[1]
         self.q1_memory_in = mem_ins[1]
         self.q2_memory_in = mem_ins[2]
 

Mutant 417

--- ml-agents/mlagents/trainers/sac/network.py
+++ ml-agents/mlagents/trainers/sac/network.py
@@ -419,7 +419,7 @@
             _end = input_size // num_mems * (i + 1)
             mem_ins.append(self.memory_in[:, _start:_end])
         self.value_memory_in = mem_ins[0]
-        self.q1_memory_in = mem_ins[1]
+        self.q1_memory_in = mem_ins[2]
         self.q2_memory_in = mem_ins[2]
 
     def _create_observation_in(self, vis_encode_type):