diff --git a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py index f149f64fbb..356443a72a 100644 --- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py +++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py @@ -80,7 +80,7 @@ def test_reward_decreases( init_reward_expert = gail_rp.evaluate(buffer_expert)[0] init_reward_policy = gail_rp.evaluate(buffer_policy)[0] - for _ in range(10): + for _ in range(20): gail_rp.update(buffer_policy) reward_expert = gail_rp.evaluate(buffer_expert)[0] reward_policy = gail_rp.evaluate(buffer_policy)[0] diff --git a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py index 88c84dcd8c..80ff5b508b 100644 --- a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py +++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py @@ -103,7 +103,7 @@ def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None: ) self._estimator = torch.nn.Sequential( - linear_layer(estimator_input_size, 1), torch.nn.Sigmoid() + linear_layer(estimator_input_size, 1, kernel_gain=0.2), torch.nn.Sigmoid() ) def get_action_input(self, mini_batch: AgentBuffer) -> torch.Tensor: