diff --git a/algorithms/a2c/agent.py b/algorithms/a2c/agent.py index 46a93e6b..0af29f94 100644 --- a/algorithms/a2c/agent.py +++ b/algorithms/a2c/agent.py @@ -16,12 +16,12 @@ import torch.nn.functional as F import wandb -from algorithms.common.abstract.agent import Agent as AbstractAgent +from algorithms.common.abstract.agent import Agent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(AbstractAgent): +class A2CAgent(Agent): """1-Step Advantage Actor-Critic interacting with environment. Attributes: @@ -55,7 +55,7 @@ def __init__( optims (tuple): optimizers for actor and critic """ - AbstractAgent.__init__(self, env, args) + Agent.__init__(self, env, args) self.actor, self.critic = models self.actor_optimizer, self.critic_optimizer = optims @@ -158,7 +158,7 @@ def save_params(self, n_episode: int): "critic_optim_state_dict": self.critic_optimizer.state_dict(), } - AbstractAgent.save_params(self, params, n_episode) + Agent.save_params(self, params, n_episode) def write_log(self, i: int, score: int, policy_loss: float, value_loss: float): total_loss = policy_loss + value_loss diff --git a/algorithms/bc/ddpg_agent.py b/algorithms/bc/ddpg_agent.py index 7c3e69e8..a51cf3d8 100644 --- a/algorithms/bc/ddpg_agent.py +++ b/algorithms/bc/ddpg_agent.py @@ -16,20 +16,20 @@ import torch.nn.functional as F import wandb -from algorithms.common.abstract.her import HER as AbstractHER +from algorithms.common.abstract.her import HER from algorithms.common.buffer.replay_buffer import ReplayBuffer import algorithms.common.helper_functions as common_utils from algorithms.common.noise import OUNoise -from algorithms.ddpg.agent import Agent as DDPGAgent +from algorithms.ddpg.agent import DDPGAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(DDPGAgent): +class BCDDPGAgent(DDPGAgent): """BC with DDPG agent interacting with environment. Attributes: - HER (AbstractHER): hinsight experience replay + her (HER): hinsight experience replay transitions_epi (list): transitions per episode (for HER) desired_state (np.ndarray): desired state of current episode memory (ReplayBuffer): replay memory @@ -47,14 +47,14 @@ def __init__( models: tuple, optims: tuple, noise: OUNoise, - HER: AbstractHER, + her: HER, ): """Initialization. Args: - HER (AbstractHER): hinsight experience replay + her (HER): hinsight experience replay """ - self.HER = HER + self.her = her DDPGAgent.__init__(self, env, args, hyper_params, models, optims, noise) # pylint: disable=attribute-defined-outside-init @@ -66,7 +66,6 @@ def _initialize(self): # HER if self.hyper_params["USE_HER"]: - self.her = self.HER() if self.hyper_params["DESIRED_STATES_FROM_DEMO"]: self.her.fetch_desired_states_from_demo(demo) diff --git a/algorithms/bc/sac_agent.py b/algorithms/bc/sac_agent.py index 1d1d4e42..d77b6e04 100644 --- a/algorithms/bc/sac_agent.py +++ b/algorithms/bc/sac_agent.py @@ -18,19 +18,19 @@ import torch.nn.functional as F import wandb -from algorithms.common.abstract.her import HER as AbstractHER +from algorithms.common.abstract.her import HER from algorithms.common.buffer.replay_buffer import ReplayBuffer import algorithms.common.helper_functions as common_utils -from algorithms.sac.agent import Agent as SACAgent +from algorithms.sac.agent import SACAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(SACAgent): +class BCSACAgent(SACAgent): """BC with SAC agent interacting with environment. Attrtibutes: - HER (AbstractHER): hinsight experience replay + her (HER): hinsight experience replay transitions_epi (list): transitions per episode (for HER) desired_state (np.ndarray): desired state of current episode memory (ReplayBuffer): replay memory @@ -48,14 +48,14 @@ def __init__( models: tuple, optims: tuple, target_entropy: float, - HER: AbstractHER, + her: HER, ): """Initialization. Args: - HER (AbstractHER): hinsight experience replay + her (HER): hinsight experience replay """ - self.HER = HER + self.her = her SACAgent.__init__(self, env, args, hyper_params, models, optims, target_entropy) # pylint: disable=attribute-defined-outside-init @@ -67,7 +67,6 @@ def _initialize(self): # HER if self.hyper_params["USE_HER"]: - self.her = self.HER() if self.hyper_params["DESIRED_STATES_FROM_DEMO"]: self.her.fetch_desired_states_from_demo(demo) diff --git a/algorithms/ddpg/agent.py b/algorithms/ddpg/agent.py index 8b774c6a..e00eed3f 100644 --- a/algorithms/ddpg/agent.py +++ b/algorithms/ddpg/agent.py @@ -16,7 +16,7 @@ import torch.nn.functional as F import wandb -from algorithms.common.abstract.agent import Agent as AbstractAgent +from algorithms.common.abstract.agent import Agent from algorithms.common.buffer.replay_buffer import ReplayBuffer import algorithms.common.helper_functions as common_utils from algorithms.common.noise import OUNoise @@ -24,7 +24,7 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(AbstractAgent): +class DDPGAgent(Agent): """ActorCritic interacting with environment. Attributes: @@ -64,7 +64,7 @@ def __init__( noise (OUNoise): random noise for exploration """ - AbstractAgent.__init__(self, env, args) + Agent.__init__(self, env, args) self.actor, self.actor_target, self.critic, self.critic_target = models self.actor_optimizer, self.critic_optimizer = optims @@ -196,7 +196,7 @@ def save_params(self, n_episode: int): "critic_optim_state_dict": self.critic_optimizer.state_dict(), } - AbstractAgent.save_params(self, params, n_episode) + Agent.save_params(self, params, n_episode) def write_log(self, i: int, loss: np.ndarray, score: int): """Write log about loss and score""" diff --git a/algorithms/dqn/agent.py b/algorithms/dqn/agent.py index b6775322..dd81c76b 100644 --- a/algorithms/dqn/agent.py +++ b/algorithms/dqn/agent.py @@ -22,7 +22,7 @@ from torch.nn.utils import clip_grad_norm_ import wandb -from algorithms.common.abstract.agent import Agent as AbstractAgent +from algorithms.common.abstract.agent import Agent from algorithms.common.buffer.priortized_replay_buffer import PrioritizedReplayBuffer from algorithms.common.buffer.replay_buffer import NStepTransitionBuffer import algorithms.common.helper_functions as common_utils @@ -31,7 +31,7 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(AbstractAgent): +class DQNAgent(Agent): """DQN interacting with environment. Attribute: @@ -69,7 +69,7 @@ def __init__( optim (torch.optim.Adam): optimizers for dqn """ - AbstractAgent.__init__(self, env, args) + Agent.__init__(self, env, args) self.use_n_step = hyper_params["N_STEP"] > 1 self.epsilon = hyper_params["MAX_EPSILON"] @@ -261,7 +261,7 @@ def save_params(self, n_episode: int): "dqn_optim_state_dict": self.dqn_optimizer.state_dict(), } - AbstractAgent.save_params(self, params, n_episode) + Agent.save_params(self, params, n_episode) def write_log(self, i: int, loss: np.ndarray, score: float): """Write log about loss and score""" diff --git a/algorithms/fd/ddpg_agent.py b/algorithms/fd/ddpg_agent.py index 39e058ce..cd690dba 100644 --- a/algorithms/fd/ddpg_agent.py +++ b/algorithms/fd/ddpg_agent.py @@ -17,12 +17,12 @@ from algorithms.common.buffer.priortized_replay_buffer import PrioritizedReplayBufferfD from algorithms.common.buffer.replay_buffer import NStepTransitionBuffer import algorithms.common.helper_functions as common_utils -from algorithms.ddpg.agent import Agent as DDPGAgent +from algorithms.ddpg.agent import DDPGAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(DDPGAgent): +class DDPGfDAgent(DDPGAgent): """ActorCritic interacting with environment. Attributes: diff --git a/algorithms/fd/dqn_agent.py b/algorithms/fd/dqn_agent.py index dde5170d..af305621 100644 --- a/algorithms/fd/dqn_agent.py +++ b/algorithms/fd/dqn_agent.py @@ -22,12 +22,12 @@ from algorithms.common.buffer.priortized_replay_buffer import PrioritizedReplayBufferfD from algorithms.common.buffer.replay_buffer import NStepTransitionBuffer import algorithms.common.helper_functions as common_utils -from algorithms.dqn.agent import Agent as DQNAgent +from algorithms.dqn.agent import DQNAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(DQNAgent): +class DQNfDAgent(DQNAgent): """DQN interacting with environment. Attribute: diff --git a/algorithms/fd/sac_agent.py b/algorithms/fd/sac_agent.py index 347b289b..30178c76 100644 --- a/algorithms/fd/sac_agent.py +++ b/algorithms/fd/sac_agent.py @@ -18,12 +18,12 @@ from algorithms.common.buffer.priortized_replay_buffer import PrioritizedReplayBufferfD from algorithms.common.buffer.replay_buffer import NStepTransitionBuffer import algorithms.common.helper_functions as common_utils -from algorithms.sac.agent import Agent as SACAgent +from algorithms.sac.agent import SACAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(SACAgent): +class SACfDAgent(SACAgent): """SAC agent interacting with environment. Attrtibutes: diff --git a/algorithms/per/ddpg_agent.py b/algorithms/per/ddpg_agent.py index edf21754..3821fd6d 100644 --- a/algorithms/per/ddpg_agent.py +++ b/algorithms/per/ddpg_agent.py @@ -13,12 +13,12 @@ from algorithms.common.buffer.priortized_replay_buffer import PrioritizedReplayBuffer import algorithms.common.helper_functions as common_utils -from algorithms.ddpg.agent import Agent as DDPGAgent +from algorithms.ddpg.agent import DDPGAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(DDPGAgent): +class DDPGPERAgent(DDPGAgent): """ActorCritic interacting with environment. Attributes: diff --git a/algorithms/ppo/agent.py b/algorithms/ppo/agent.py index dfd7baf6..ddbe6393 100644 --- a/algorithms/ppo/agent.py +++ b/algorithms/ppo/agent.py @@ -16,14 +16,14 @@ import torch.nn as nn import wandb -from algorithms.common.abstract.agent import Agent as AbstractAgent +from algorithms.common.abstract.agent import Agent from algorithms.common.env.multiprocessing_env import SubprocVecEnv import algorithms.ppo.utils as ppo_utils device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(AbstractAgent): +class PPOAgent(Agent): """PPO Agent. Attributes: @@ -64,7 +64,7 @@ def __init__( optims (tuple): optimizers for actor and critic """ - AbstractAgent.__init__(self, env_single, args) + Agent.__init__(self, env_single, args) if not self.args.test: self.env = env_multi @@ -251,7 +251,7 @@ def save_params(self, n_episode: int): "actor_optim_state_dict": self.actor_optimizer.state_dict(), "critic_optim_state_dict": self.critic_optimizer.state_dict(), } - AbstractAgent.save_params(self, params, n_episode) + Agent.save_params(self, params, n_episode) def write_log( self, diff --git a/algorithms/sac/agent.py b/algorithms/sac/agent.py index 8eb56ed1..0791ca03 100644 --- a/algorithms/sac/agent.py +++ b/algorithms/sac/agent.py @@ -18,14 +18,14 @@ import torch.optim as optim import wandb -from algorithms.common.abstract.agent import Agent as AbstractAgent +from algorithms.common.abstract.agent import Agent from algorithms.common.buffer.replay_buffer import ReplayBuffer import algorithms.common.helper_functions as common_utils device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(AbstractAgent): +class SACAgent(Agent): """SAC agent interacting with environment. Attrtibutes: @@ -71,7 +71,7 @@ def __init__( target_entropy (float): target entropy for the inequality constraint """ - AbstractAgent.__init__(self, env, args) + Agent.__init__(self, env, args) self.actor, self.vf, self.vf_target, self.qf_1, self.qf_2 = models self.actor_optimizer, self.vf_optimizer = optims[0:2] @@ -277,7 +277,7 @@ def save_params(self, n_episode: int): if self.hyper_params["AUTO_ENTROPY_TUNING"]: params["alpha_optim"] = self.alpha_optimizer.state_dict() - AbstractAgent.save_params(self, params, n_episode) + Agent.save_params(self, params, n_episode) def write_log( self, i: int, loss: np.ndarray, score: float = 0.0, delayed_update: int = 1 diff --git a/algorithms/td3/agent.py b/algorithms/td3/agent.py index b9f0d41c..63ecefc8 100644 --- a/algorithms/td3/agent.py +++ b/algorithms/td3/agent.py @@ -16,7 +16,7 @@ import torch.nn.functional as F import wandb -from algorithms.common.abstract.agent import Agent as AbstractAgent +from algorithms.common.abstract.agent import Agent from algorithms.common.buffer.replay_buffer import ReplayBuffer import algorithms.common.helper_functions as common_utils from algorithms.common.noise import GaussianNoise @@ -24,7 +24,7 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(AbstractAgent): +class TD3Agent(Agent): """ActorCritic interacting with environment. Attributes: @@ -66,7 +66,7 @@ def __init__( noise (GaussianNoise): random noise for exploration """ - AbstractAgent.__init__(self, env, args) + Agent.__init__(self, env, args) self.actor, self.actor_target = models[0:2] self.critic_1, self.critic_2 = models[2:4] @@ -225,7 +225,7 @@ def save_params(self, n_episode: int): "critic_optim": self.critic_optimizer.state_dict(), } - AbstractAgent.save_params(self, params, n_episode) + Agent.save_params(self, params, n_episode) def write_log( self, i: int, loss: np.ndarray, score: float = 0.0, delayed_update: int = 1 diff --git a/examples/lunarlander_continuous_v2/a2c.py b/examples/lunarlander_continuous_v2/a2c.py index cb963f62..7173eb60 100644 --- a/examples/lunarlander_continuous_v2/a2c.py +++ b/examples/lunarlander_continuous_v2/a2c.py @@ -11,7 +11,7 @@ import torch import torch.optim as optim -from algorithms.a2c.agent import Agent +from algorithms.a2c.agent import A2CAgent from algorithms.common.networks.mlp import MLP, GaussianDist device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -65,7 +65,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims) + agent = A2CAgent(env, args, hyper_params, models, optims) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/bc-ddpg.py b/examples/lunarlander_continuous_v2/bc-ddpg.py index 0c266dc0..1a6e7b88 100644 --- a/examples/lunarlander_continuous_v2/bc-ddpg.py +++ b/examples/lunarlander_continuous_v2/bc-ddpg.py @@ -13,7 +13,7 @@ import torch import torch.optim as optim -from algorithms.bc.ddpg_agent import Agent +from algorithms.bc.ddpg_agent import BCDDPGAgent from algorithms.common.networks.mlp import MLP from algorithms.common.noise import OUNoise from examples.lunarlander_continuous_v2.utils import LunarLanderContinuousHER @@ -114,10 +114,10 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # HER - HER = LunarLanderContinuousHER if hyper_params["USE_HER"] else None + her = LunarLanderContinuousHER() if hyper_params["USE_HER"] else None # create an agent - agent = Agent(env, args, hyper_params, models, optims, noise, HER) + agent = BCDDPGAgent(env, args, hyper_params, models, optims, noise, her) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/bc-sac.py b/examples/lunarlander_continuous_v2/bc-sac.py index a2fbac7b..e3258b58 100644 --- a/examples/lunarlander_continuous_v2/bc-sac.py +++ b/examples/lunarlander_continuous_v2/bc-sac.py @@ -12,7 +12,7 @@ import torch import torch.optim as optim -from algorithms.bc.sac_agent import Agent +from algorithms.bc.sac_agent import BCSACAgent from algorithms.common.networks.mlp import MLP, FlattenMLP, TanhGaussianDistParams from examples.lunarlander_continuous_v2.utils import LunarLanderContinuousHER @@ -117,10 +117,10 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, vf_optim, qf_1_optim, qf_2_optim) # HER - HER = LunarLanderContinuousHER if hyper_params["USE_HER"] else None + her = LunarLanderContinuousHER() if hyper_params["USE_HER"] else None # create an agent - agent = Agent(env, args, hyper_params, models, optims, target_entropy, HER) + agent = BCSACAgent(env, args, hyper_params, models, optims, target_entropy, her) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/ddpg.py b/examples/lunarlander_continuous_v2/ddpg.py index 5da373f4..0db10528 100644 --- a/examples/lunarlander_continuous_v2/ddpg.py +++ b/examples/lunarlander_continuous_v2/ddpg.py @@ -13,7 +13,7 @@ from algorithms.common.networks.mlp import MLP from algorithms.common.noise import OUNoise -from algorithms.ddpg.agent import Agent +from algorithms.ddpg.agent import DDPGAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -101,7 +101,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims, noise) + agent = DDPGAgent(env, args, hyper_params, models, optims, noise) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/ddpgfd.py b/examples/lunarlander_continuous_v2/ddpgfd.py index c4f6c4b3..d5a98f04 100644 --- a/examples/lunarlander_continuous_v2/ddpgfd.py +++ b/examples/lunarlander_continuous_v2/ddpgfd.py @@ -13,7 +13,7 @@ from algorithms.common.networks.mlp import MLP from algorithms.common.noise import OUNoise -from algorithms.fd.ddpg_agent import Agent +from algorithms.fd.ddpg_agent import DDPGfDAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -109,7 +109,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims, noise) + agent = DDPGfDAgent(env, args, hyper_params, models, optims, noise) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/per-ddpg.py b/examples/lunarlander_continuous_v2/per-ddpg.py index fc038c65..33769669 100644 --- a/examples/lunarlander_continuous_v2/per-ddpg.py +++ b/examples/lunarlander_continuous_v2/per-ddpg.py @@ -13,7 +13,7 @@ from algorithms.common.networks.mlp import MLP from algorithms.common.noise import OUNoise -from algorithms.per.ddpg_agent import Agent +from algorithms.per.ddpg_agent import DDPGPERAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -104,7 +104,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims, noise) + agent = DDPGPERAgent(env, args, hyper_params, models, optims, noise) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/ppo.py b/examples/lunarlander_continuous_v2/ppo.py index 26b4aac4..dafea19e 100644 --- a/examples/lunarlander_continuous_v2/ppo.py +++ b/examples/lunarlander_continuous_v2/ppo.py @@ -14,7 +14,7 @@ from algorithms.common.env.utils import env_generator, make_envs from algorithms.common.networks.mlp import MLP, GaussianDist -from algorithms.ppo.agent import Agent +from algorithms.ppo.agent import PPOAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -92,7 +92,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # create an agent - agent = Agent(env_single, env_multi, args, hyper_params, models, optims) + agent = PPOAgent(env_single, env_multi, args, hyper_params, models, optims) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/sac.py b/examples/lunarlander_continuous_v2/sac.py index ae5a4cfa..344d6c9a 100644 --- a/examples/lunarlander_continuous_v2/sac.py +++ b/examples/lunarlander_continuous_v2/sac.py @@ -13,7 +13,7 @@ import torch.optim as optim from algorithms.common.networks.mlp import MLP, FlattenMLP, TanhGaussianDistParams -from algorithms.sac.agent import Agent +from algorithms.sac.agent import SACAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -106,7 +106,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, vf_optim, qf_1_optim, qf_2_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims, target_entropy) + agent = SACAgent(env, args, hyper_params, models, optims, target_entropy) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/sacfd.py b/examples/lunarlander_continuous_v2/sacfd.py index 82fdbe4a..665c9407 100644 --- a/examples/lunarlander_continuous_v2/sacfd.py +++ b/examples/lunarlander_continuous_v2/sacfd.py @@ -13,7 +13,7 @@ import torch.optim as optim from algorithms.common.networks.mlp import MLP, FlattenMLP, TanhGaussianDistParams -from algorithms.fd.sac_agent import Agent +from algorithms.fd.sac_agent import SACfDAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -112,7 +112,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, vf_optim, qf_1_optim, qf_2_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims, target_entropy) + agent = SACfDAgent(env, args, hyper_params, models, optims, target_entropy) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/td3.py b/examples/lunarlander_continuous_v2/td3.py index 9015c6bf..b73a05c5 100644 --- a/examples/lunarlander_continuous_v2/td3.py +++ b/examples/lunarlander_continuous_v2/td3.py @@ -13,7 +13,7 @@ from algorithms.common.networks.mlp import MLP from algorithms.common.noise import GaussianNoise -from algorithms.td3.agent import Agent +from algorithms.td3.agent import TD3Agent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -115,7 +115,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims, noise) + agent = TD3Agent(env, args, hyper_params, models, optims, noise) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/utils.py b/examples/lunarlander_continuous_v2/utils.py index 0ab7fddd..7e8a5007 100644 --- a/examples/lunarlander_continuous_v2/utils.py +++ b/examples/lunarlander_continuous_v2/utils.py @@ -7,7 +7,7 @@ import numpy as np -from algorithms.common.abstract.her import HER as AbstractHER +from algorithms.common.abstract.her import HER from algorithms.common.abstract.reward_fn import RewardFn @@ -22,7 +22,7 @@ def __call__(self, transition: tuple, goal_state: np.ndarray) -> np.float64: return np.float64(-1.0) -class LunarLanderContinuousHER(AbstractHER): +class LunarLanderContinuousHER(HER): """HER for LunarLanderContinuous-v2 environment. Attributes: @@ -33,7 +33,7 @@ class LunarLanderContinuousHER(AbstractHER): def __init__(self, reward_func: RewardFn = L1DistanceRewardFn): """Initialization.""" - AbstractHER.__init__(self, reward_func=reward_func) + HER.__init__(self, reward_func=reward_func) # pylint: disable=attribute-defined-outside-init def fetch_desired_states_from_demo(self, demo: list): diff --git a/examples/lunarlander_v2/dqfd.py b/examples/lunarlander_v2/dqfd.py index 2c9155dc..3b373c96 100644 --- a/examples/lunarlander_v2/dqfd.py +++ b/examples/lunarlander_v2/dqfd.py @@ -12,7 +12,7 @@ import torch.optim as optim from algorithms.dqn.networks import C51DuelingMLP -from algorithms.fd.dqn_agent import Agent +from algorithms.fd.dqn_agent import DQNfDAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -89,7 +89,7 @@ def get_fc_model(): models = (dqn, dqn_target) # create an agent - agent = Agent(env, args, hyper_params, models, dqn_optim) + agent = DQNfDAgent(env, args, hyper_params, models, dqn_optim) # run if args.test: diff --git a/examples/lunarlander_v2/dqn.py b/examples/lunarlander_v2/dqn.py index 54a24de7..c824dfaf 100644 --- a/examples/lunarlander_v2/dqn.py +++ b/examples/lunarlander_v2/dqn.py @@ -11,7 +11,7 @@ import torch import torch.optim as optim -from algorithms.dqn.agent import Agent +from algorithms.dqn.agent import DQNAgent from algorithms.dqn.networks import C51DuelingMLP device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -85,7 +85,7 @@ def get_fc_model(): models = (dqn, dqn_target) # create an agent - agent = Agent(env, args, hyper_params, models, dqn_optim) + agent = DQNAgent(env, args, hyper_params, models, dqn_optim) # run if args.test: diff --git a/examples/pong_no_frameskip_v4/dqn.py b/examples/pong_no_frameskip_v4/dqn.py index 08d81da0..074b987e 100644 --- a/examples/pong_no_frameskip_v4/dqn.py +++ b/examples/pong_no_frameskip_v4/dqn.py @@ -12,7 +12,7 @@ import torch.optim as optim from algorithms.common.networks.cnn import CNNLayer -from algorithms.dqn.agent import Agent +from algorithms.dqn.agent import DQNAgent from algorithms.dqn.networks import IQNCNN, IQNMLP device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -101,7 +101,7 @@ def get_cnn_model(): models = (dqn, dqn_target) # create an agent - agent = Agent(env, args, hyper_params, models, dqn_optim) + agent = DQNAgent(env, args, hyper_params, models, dqn_optim) agent.env_name = env_name # run diff --git a/examples/reacher_v2/bc-ddpg.py b/examples/reacher_v2/bc-ddpg.py index 1a8a78aa..1e11e88a 100644 --- a/examples/reacher_v2/bc-ddpg.py +++ b/examples/reacher_v2/bc-ddpg.py @@ -12,7 +12,7 @@ import torch import torch.optim as optim -from algorithms.bc.ddpg_agent import Agent +from algorithms.bc.ddpg_agent import BCDDPGAgent from algorithms.common.networks.mlp import MLP from algorithms.common.noise import OUNoise from examples.reacher_v2.utils import ReacherHER @@ -36,7 +36,7 @@ "INITIAL_RANDOM_ACTION": 10000, "MULTIPLE_LEARN": 1, # HER - "USE_HER": True, + "USE_HER": False, "SUCCESS_SCORE": -5.0, "DESIRED_STATES_FROM_DEMO": False, } @@ -110,10 +110,10 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # HER - HER = ReacherHER if hyper_params["USE_HER"] else None + her = ReacherHER() if hyper_params["USE_HER"] else None # create an agent - agent = Agent(env, args, hyper_params, models, optims, noise, HER) + agent = BCDDPGAgent(env, args, hyper_params, models, optims, noise, her) # run if args.test: diff --git a/examples/reacher_v2/bc-sac.py b/examples/reacher_v2/bc-sac.py index 2d1558a6..5773eb95 100644 --- a/examples/reacher_v2/bc-sac.py +++ b/examples/reacher_v2/bc-sac.py @@ -12,7 +12,7 @@ import torch import torch.optim as optim -from algorithms.bc.sac_agent import Agent +from algorithms.bc.sac_agent import BCSACAgent from algorithms.common.networks.mlp import MLP, FlattenMLP, TanhGaussianDistParams from examples.reacher_v2.utils import ReacherHER @@ -114,10 +114,10 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, vf_optim, qf_1_optim, qf_2_optim) # HER - HER = ReacherHER if hyper_params["USE_HER"] else None + her = ReacherHER() if hyper_params["USE_HER"] else None # create an agent - agent = Agent(env, args, hyper_params, models, optims, target_entropy, HER) + agent = BCSACAgent(env, args, hyper_params, models, optims, target_entropy, her) # run if args.test: diff --git a/examples/reacher_v2/ddpg.py b/examples/reacher_v2/ddpg.py index d625c8c5..2a066f80 100644 --- a/examples/reacher_v2/ddpg.py +++ b/examples/reacher_v2/ddpg.py @@ -13,7 +13,7 @@ from algorithms.common.networks.mlp import MLP from algorithms.common.noise import OUNoise -from algorithms.ddpg.agent import Agent +from algorithms.ddpg.agent import DDPGAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -101,7 +101,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims, noise) + agent = DDPGAgent(env, args, hyper_params, models, optims, noise) # run if args.test: diff --git a/examples/reacher_v2/sac.py b/examples/reacher_v2/sac.py index 1f37124f..3f2607d2 100644 --- a/examples/reacher_v2/sac.py +++ b/examples/reacher_v2/sac.py @@ -13,7 +13,7 @@ import torch.optim as optim from algorithms.common.networks.mlp import MLP, FlattenMLP, TanhGaussianDistParams -from algorithms.sac.agent import Agent +from algorithms.sac.agent import SACAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -106,7 +106,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, vf_optim, qf_1_optim, qf_2_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims, target_entropy) + agent = SACAgent(env, args, hyper_params, models, optims, target_entropy) # run if args.test: diff --git a/examples/reacher_v2/td3.py b/examples/reacher_v2/td3.py index c553a649..c488afad 100644 --- a/examples/reacher_v2/td3.py +++ b/examples/reacher_v2/td3.py @@ -13,7 +13,7 @@ from algorithms.common.networks.mlp import MLP from algorithms.common.noise import GaussianNoise -from algorithms.td3.agent import Agent +from algorithms.td3.agent import TD3Agent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -115,7 +115,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims, noise) + agent = TD3Agent(env, args, hyper_params, models, optims, noise) # run if args.test: diff --git a/examples/reacher_v2/utils.py b/examples/reacher_v2/utils.py index ec228599..d2c73ba7 100644 --- a/examples/reacher_v2/utils.py +++ b/examples/reacher_v2/utils.py @@ -7,7 +7,7 @@ import numpy as np -from algorithms.common.abstract.her import HER as AbstractHER +from algorithms.common.abstract.her import HER from algorithms.common.abstract.reward_fn import RewardFn @@ -22,12 +22,12 @@ def __call__(self, transition: tuple, _) -> np.float64: return reward_dist + reward_ctrl -class ReacherHER(AbstractHER): +class ReacherHER(HER): """HER for Reacher-v2 environment.""" def __init__(self, reward_func: RewardFn = ReacherRewardFn): """Initialization.""" - AbstractHER.__init__(self, reward_func=reward_func) + HER.__init__(self, reward_func=reward_func) def fetch_desired_states_from_demo(self, _: list): """Return desired goal states from demonstration data.