diff --git a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
index f149f64fbb..356443a72a 100644
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
@@ -80,7 +80,7 @@ def test_reward_decreases(
     init_reward_expert = gail_rp.evaluate(buffer_expert)[0]
     init_reward_policy = gail_rp.evaluate(buffer_policy)[0]
 
-    for _ in range(10):
+    for _ in range(20):
         gail_rp.update(buffer_policy)
         reward_expert = gail_rp.evaluate(buffer_expert)[0]
         reward_policy = gail_rp.evaluate(buffer_policy)[0]
diff --git a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
index 88c84dcd8c..80ff5b508b 100644
--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
@@ -103,7 +103,7 @@ def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None:
             )
 
         self._estimator = torch.nn.Sequential(
-            linear_layer(estimator_input_size, 1), torch.nn.Sigmoid()
+            linear_layer(estimator_input_size, 1, kernel_gain=0.2), torch.nn.Sigmoid()
         )
 
     def get_action_input(self, mini_batch: AgentBuffer) -> torch.Tensor: