From 4cb3f990ac9d7a7219bc24d7ef46ffa6eb1b2da6 Mon Sep 17 00:00:00 2001 From: yuanmingqi Date: Thu, 12 Oct 2023 09:22:40 -0400 Subject: [PATCH 1/2] update daac_procgen apps. --- .gitignore | 1 + rllte/hub/applications/procgen.py | 30 +++++++++++++++++++++++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index f797b72d..654d3640 100644 --- a/.gitignore +++ b/.gitignore @@ -33,5 +33,6 @@ _build/ /train.py /checkpoints_in /rllte/copilot/g4f +/misc # find . | grep -E "(/__pycache__$|\.pyc$|\.pyo$)" | xargs rm -rf \ No newline at end of file diff --git a/rllte/hub/applications/procgen.py b/rllte/hub/applications/procgen.py index 146700d4..c6b5b00e 100644 --- a/rllte/hub/applications/procgen.py +++ b/rllte/hub/applications/procgen.py @@ -53,6 +53,7 @@ def __init__(self, agent: str = "PPO", env_id: str = "bigfish", seed: int = 1, d num_levels=200, start_level=0, distribution_mode="easy", + asynchronous=False ) eval_envs = make_envpool_procgen_env( env_id=env_id, @@ -90,6 +91,29 @@ def __init__(self, agent: str = "PPO", env_id: str = "bigfish", seed: int = 1, d init_fn="xavier_uniform", ) elif agent == "DAAC": + # Best hyperparameters for DAAC reported in + # https://github.com/rraileanu/idaac/blob/main/hyperparams.py + if env_id in ['plunder', 'chaser']: + value_epochs = 1 + else: + value_epochs = 9 + + if env_id in ['miner', 'bigfish', 'dodgeball']: + value_freq = 32 + elif env_id == 'plunder': + value_freq = 8 + else: + value_freq = 1 + + if env_id == 'plunder': + adv_coef = 0.3 + elif env_id == 'chaser': + adv_coef = 0.15 + elif env_id in ['climber', 'bigfish']: + adv_coef = 0.05 + else: + adv_coef = 0.25 + self.agent = DAAC( # type: ignore[assignment] env=envs, eval_env=eval_envs, @@ -104,11 +128,11 @@ def __init__(self, agent: str = "PPO", env_id: str = "bigfish", seed: int = 1, d clip_range=0.2, clip_range_vf=0.2, policy_epochs=1, - value_epochs=9, - value_freq=3, + value_epochs=value_epochs, + value_freq=value_freq, vf_coef=0.5, ent_coef=0.01, - adv_coef=0.05, + adv_coef=adv_coef, max_grad_norm=0.5, init_fn="xavier_uniform", ) From 446f32e10ffa1b40c924ea09fed11c01b080fd70 Mon Sep 17 00:00:00 2001 From: yuanmingqi Date: Mon, 16 Oct 2023 01:38:15 -0400 Subject: [PATCH 2/2] update decoupled policy --- rllte/env/procgen/__init__.py | 6 +++--- rllte/xploit/policy/on_policy_decoupled_actor_critic.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rllte/env/procgen/__init__.py b/rllte/env/procgen/__init__.py index faf0ee4a..c6e6b063 100644 --- a/rllte/env/procgen/__init__.py +++ b/rllte/env/procgen/__init__.py @@ -27,7 +27,7 @@ import gymnasium as gym import numpy as np -from gymnasium.spaces import Box +from gymnasium.spaces import Box, Discrete from gymnasium.wrappers import NormalizeReward, RecordEpisodeStatistics, TransformObservation, TransformReward from procgen import ProcgenEnv @@ -53,7 +53,7 @@ def __init__(self, env: gym.Env, num_envs: int) -> None: shape=[3, 64, 64], dtype=env.observation_space["rgb"].dtype, ) - self.single_action_space = env.action_space + self.single_action_space = Discrete(env.action_space.n) self.is_vector_env = True self.num_envs = num_envs @@ -169,7 +169,7 @@ def make_procgen_env( num_levels=num_levels, start_level=start_level, distribution_mode=distribution_mode, - rand_seed=seed, + # rand_seed=seed, ) envs = AdapterEnv(envs, num_envs) envs = TransformObservation(envs, lambda obs: obs["rgb"].transpose(0, 3, 1, 2)) diff --git a/rllte/xploit/policy/on_policy_decoupled_actor_critic.py b/rllte/xploit/policy/on_policy_decoupled_actor_critic.py index 121b0ca6..e93995b7 100644 --- a/rllte/xploit/policy/on_policy_decoupled_actor_critic.py +++ b/rllte/xploit/policy/on_policy_decoupled_actor_critic.py @@ -147,7 +147,7 @@ def freeze(self, encoder: nn.Module, dist: Distribution) -> None: # initialize parameters self.apply(self.init_fn) # synchronize the parameters of actor_encoder and critic_encoder - self.critic_encoder.load_state_dict(self.actor_encoder.state_dict()) + # self.critic_encoder.load_state_dict(self.actor_encoder.state_dict()) # build optimizers self.actor_params = itertools.chain(self.actor_encoder.parameters(), self.actor.parameters(), self.gae.parameters()) self.critic_params = itertools.chain(self.critic_encoder.parameters(), self.critic.parameters())