Unity-Technologies · vincentpierre · Jan 21, 2021 · Jan 19, 2021 · Jan 19, 2021
diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -26,6 +26,7 @@ TensorBoard. Thanks to @brccabral for the contribution! (#4816)
 - Fix a compile warning about using an obsolete enum in `GrpcExtensions.cs`. (#4812)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - Fixed a bug that would cause an exception when `RunOptions` was deserialized via `pickle`. (#4842)
+- Fixed the computation of entropy for continuous actions. (#4869)
 
 
 ## [1.7.2-preview] - 2020-12-22

diff --git a/ml-agents/mlagents/trainers/tests/torch/test_hybrid.py b/ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
@@ -23,17 +23,18 @@ def test_hybrid_ppo(action_size):
     env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_size, step_size=0.8)
     new_network_settings = attr.evolve(PPO_TORCH_CONFIG.network_settings)
     new_hyperparams = attr.evolve(
-        PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=1024
+        PPO_TORCH_CONFIG.hyperparameters,
+        batch_size=64,
+        buffer_size=1024,
+        learning_rate=1e-3,
     )
     config = attr.evolve(
         PPO_TORCH_CONFIG,
         hyperparameters=new_hyperparams,
         network_settings=new_network_settings,
         max_steps=10000,
     )
-    check_environment_trains(
-        env, {BRAIN_NAME: config}, success_threshold=0.9, training_seed=1212
-    )
+    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
 
 
 @pytest.mark.parametrize("num_visual", [1, 2])

diff --git a/ml-agents/mlagents/trainers/torch/distributions.py b/ml-agents/mlagents/trainers/torch/distributions.py
@@ -74,7 +74,7 @@ def pdf(self, value):
 
     def entropy(self):
         return torch.mean(
-            0.5 * torch.log(2 * math.pi * math.e * self.std + EPSILON),
+            0.5 * torch.log(2 * math.pi * math.e * self.std ** 2 + EPSILON),
             dim=1,
             keepdim=True,
         )  # Use equivalent behavior to TF