Turn down gain on GAIL discriminator output (#4762) (#4772)

Unity-Technologies · Dec 18, 2020 · 30ed097 · 30ed097
1 parent a2b81c3
commit 30ed097
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 2 deletions.
diff --git a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
@@ -80,7 +80,7 @@ def test_reward_decreases(
     init_reward_expert = gail_rp.evaluate(buffer_expert)[0]
     init_reward_policy = gail_rp.evaluate(buffer_policy)[0]
 
-    for _ in range(10):
+    for _ in range(20):
         gail_rp.update(buffer_policy)
         reward_expert = gail_rp.evaluate(buffer_expert)[0]
         reward_policy = gail_rp.evaluate(buffer_policy)[0]

diff --git a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
@@ -103,7 +103,7 @@ def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None:
             )
 
         self._estimator = torch.nn.Sequential(
-            linear_layer(estimator_input_size, 1), torch.nn.Sigmoid()
+            linear_layer(estimator_input_size, 1, kernel_gain=0.2), torch.nn.Sigmoid()
         )
 
     def get_action_input(self, mini_batch: AgentBuffer) -> torch.Tensor: