From 96a08e487d10bef9698c56447de3866339a8cf0d Mon Sep 17 00:00:00 2001 From: Chaehyeuk Date: Mon, 8 Apr 2019 17:19:51 +0900 Subject: [PATCH 1/5] Remove abstract --- algorithms/a2c/agent.py | 2 +- algorithms/bc/ddpg_agent.py | 2 +- algorithms/bc/sac_agent.py | 2 +- algorithms/common/abstract/agent.py | 2 +- algorithms/common/abstract/her.py | 6 +++--- algorithms/common/abstract/reward_fn.py | 2 +- algorithms/ddpg/agent.py | 2 +- algorithms/dqn/agent.py | 2 +- algorithms/ppo/agent.py | 2 +- algorithms/sac/agent.py | 2 +- algorithms/td3/agent.py | 2 +- examples/lunarlander_continuous_v2/utils.py | 8 ++++---- examples/reacher_v2/utils.py | 8 ++++---- 13 files changed, 21 insertions(+), 21 deletions(-) diff --git a/algorithms/a2c/agent.py b/algorithms/a2c/agent.py index 2a65897d..46a93e6b 100644 --- a/algorithms/a2c/agent.py +++ b/algorithms/a2c/agent.py @@ -16,7 +16,7 @@ import torch.nn.functional as F import wandb -from algorithms.common.abstract.agent import AbstractAgent +from algorithms.common.abstract.agent import Agent as AbstractAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") diff --git a/algorithms/bc/ddpg_agent.py b/algorithms/bc/ddpg_agent.py index af7ecc8c..7c3e69e8 100644 --- a/algorithms/bc/ddpg_agent.py +++ b/algorithms/bc/ddpg_agent.py @@ -16,7 +16,7 @@ import torch.nn.functional as F import wandb -from algorithms.common.abstract.her import AbstractHER +from algorithms.common.abstract.her import HER as AbstractHER from algorithms.common.buffer.replay_buffer import ReplayBuffer import algorithms.common.helper_functions as common_utils from algorithms.common.noise import OUNoise diff --git a/algorithms/bc/sac_agent.py b/algorithms/bc/sac_agent.py index 3608389e..1d1d4e42 100644 --- a/algorithms/bc/sac_agent.py +++ b/algorithms/bc/sac_agent.py @@ -18,7 +18,7 @@ import torch.nn.functional as F import wandb -from algorithms.common.abstract.her import AbstractHER +from algorithms.common.abstract.her import HER as AbstractHER from algorithms.common.buffer.replay_buffer import ReplayBuffer import algorithms.common.helper_functions as common_utils from algorithms.sac.agent import Agent as SACAgent diff --git a/algorithms/common/abstract/agent.py b/algorithms/common/abstract/agent.py index 769f627f..1d15f432 100644 --- a/algorithms/common/abstract/agent.py +++ b/algorithms/common/abstract/agent.py @@ -18,7 +18,7 @@ import wandb -class AbstractAgent(ABC): +class Agent(ABC): """Abstract Agent used for all agents. Attributes: diff --git a/algorithms/common/abstract/her.py b/algorithms/common/abstract/her.py index 290e019e..5fad2a75 100644 --- a/algorithms/common/abstract/her.py +++ b/algorithms/common/abstract/her.py @@ -11,10 +11,10 @@ import numpy as np -from algorithms.common.abstract.reward_fn import AbstractRewardFn +from algorithms.common.abstract.reward_fn import RewardFn -class AbstractHER(ABC): +class HER(ABC): """Abstract class for HER (final strategy). Attributes: @@ -22,7 +22,7 @@ class AbstractHER(ABC): """ - def __init__(self, reward_func: AbstractRewardFn): + def __init__(self, reward_func: RewardFn): """Initialization. Args: diff --git a/algorithms/common/abstract/reward_fn.py b/algorithms/common/abstract/reward_fn.py index b3a28bf4..a64134c7 100644 --- a/algorithms/common/abstract/reward_fn.py +++ b/algorithms/common/abstract/reward_fn.py @@ -9,7 +9,7 @@ import numpy as np -class AbstractRewardFn(ABC): +class RewardFn(ABC): """Abstract class for computing reward. New compute_reward class should redefine __call__() diff --git a/algorithms/ddpg/agent.py b/algorithms/ddpg/agent.py index 7cfea3b0..8b774c6a 100644 --- a/algorithms/ddpg/agent.py +++ b/algorithms/ddpg/agent.py @@ -16,7 +16,7 @@ import torch.nn.functional as F import wandb -from algorithms.common.abstract.agent import AbstractAgent +from algorithms.common.abstract.agent import Agent as AbstractAgent from algorithms.common.buffer.replay_buffer import ReplayBuffer import algorithms.common.helper_functions as common_utils from algorithms.common.noise import OUNoise diff --git a/algorithms/dqn/agent.py b/algorithms/dqn/agent.py index cbbd276b..b6775322 100644 --- a/algorithms/dqn/agent.py +++ b/algorithms/dqn/agent.py @@ -22,7 +22,7 @@ from torch.nn.utils import clip_grad_norm_ import wandb -from algorithms.common.abstract.agent import AbstractAgent +from algorithms.common.abstract.agent import Agent as AbstractAgent from algorithms.common.buffer.priortized_replay_buffer import PrioritizedReplayBuffer from algorithms.common.buffer.replay_buffer import NStepTransitionBuffer import algorithms.common.helper_functions as common_utils diff --git a/algorithms/ppo/agent.py b/algorithms/ppo/agent.py index aa0388ea..dfd7baf6 100644 --- a/algorithms/ppo/agent.py +++ b/algorithms/ppo/agent.py @@ -16,7 +16,7 @@ import torch.nn as nn import wandb -from algorithms.common.abstract.agent import AbstractAgent +from algorithms.common.abstract.agent import Agent as AbstractAgent from algorithms.common.env.multiprocessing_env import SubprocVecEnv import algorithms.ppo.utils as ppo_utils diff --git a/algorithms/sac/agent.py b/algorithms/sac/agent.py index 995cbb6e..8eb56ed1 100644 --- a/algorithms/sac/agent.py +++ b/algorithms/sac/agent.py @@ -18,7 +18,7 @@ import torch.optim as optim import wandb -from algorithms.common.abstract.agent import AbstractAgent +from algorithms.common.abstract.agent import Agent as AbstractAgent from algorithms.common.buffer.replay_buffer import ReplayBuffer import algorithms.common.helper_functions as common_utils diff --git a/algorithms/td3/agent.py b/algorithms/td3/agent.py index c32d5d59..b9f0d41c 100644 --- a/algorithms/td3/agent.py +++ b/algorithms/td3/agent.py @@ -16,7 +16,7 @@ import torch.nn.functional as F import wandb -from algorithms.common.abstract.agent import AbstractAgent +from algorithms.common.abstract.agent import Agent as AbstractAgent from algorithms.common.buffer.replay_buffer import ReplayBuffer import algorithms.common.helper_functions as common_utils from algorithms.common.noise import GaussianNoise diff --git a/examples/lunarlander_continuous_v2/utils.py b/examples/lunarlander_continuous_v2/utils.py index 0fe8b782..0ab7fddd 100644 --- a/examples/lunarlander_continuous_v2/utils.py +++ b/examples/lunarlander_continuous_v2/utils.py @@ -7,11 +7,11 @@ import numpy as np -from algorithms.common.abstract.her import AbstractHER -from algorithms.common.abstract.reward_fn import AbstractRewardFn +from algorithms.common.abstract.her import HER as AbstractHER +from algorithms.common.abstract.reward_fn import RewardFn -class L1DistanceRewardFn(AbstractRewardFn): +class L1DistanceRewardFn(RewardFn): def __call__(self, transition: tuple, goal_state: np.ndarray) -> np.float64: """L1 Distance reward function.""" next_state = transition[3] @@ -31,7 +31,7 @@ class LunarLanderContinuousHER(AbstractHER): """ - def __init__(self, reward_func: AbstractRewardFn = L1DistanceRewardFn): + def __init__(self, reward_func: RewardFn = L1DistanceRewardFn): """Initialization.""" AbstractHER.__init__(self, reward_func=reward_func) diff --git a/examples/reacher_v2/utils.py b/examples/reacher_v2/utils.py index 8c8afce6..ec228599 100644 --- a/examples/reacher_v2/utils.py +++ b/examples/reacher_v2/utils.py @@ -7,11 +7,11 @@ import numpy as np -from algorithms.common.abstract.her import AbstractHER -from algorithms.common.abstract.reward_fn import AbstractRewardFn +from algorithms.common.abstract.her import HER as AbstractHER +from algorithms.common.abstract.reward_fn import RewardFn -class ReacherRewardFn(AbstractRewardFn): +class ReacherRewardFn(RewardFn): def __call__(self, transition: tuple, _) -> np.float64: """Reward function for Reacher-v2 environment.""" state, action = transition[0:2] @@ -25,7 +25,7 @@ def __call__(self, transition: tuple, _) -> np.float64: class ReacherHER(AbstractHER): """HER for Reacher-v2 environment.""" - def __init__(self, reward_func: AbstractRewardFn = ReacherRewardFn): + def __init__(self, reward_func: RewardFn = ReacherRewardFn): """Initialization.""" AbstractHER.__init__(self, reward_func=reward_func) From 17ef9a7ed1be1ed1e0d2e4fef372db35c9745ca3 Mon Sep 17 00:00:00 2001 From: darthegg Date: Wed, 10 Apr 2019 12:12:07 +0900 Subject: [PATCH 2/5] Remove phrase 'Abstract' from abstract class --- algorithms/a2c/agent.py | 8 ++++---- algorithms/bc/ddpg_agent.py | 15 +++++++-------- algorithms/bc/sac_agent.py | 15 +++++++-------- algorithms/ddpg/agent.py | 8 ++++---- algorithms/dqn/agent.py | 8 ++++---- algorithms/fd/ddpg_agent.py | 4 ++-- algorithms/fd/dqn_agent.py | 4 ++-- algorithms/fd/sac_agent.py | 4 ++-- algorithms/per/ddpg_agent.py | 4 ++-- algorithms/ppo/agent.py | 8 ++++---- algorithms/sac/agent.py | 8 ++++---- algorithms/td3/agent.py | 8 ++++---- examples/lunarlander_continuous_v2/a2c.py | 4 ++-- examples/lunarlander_continuous_v2/bc-ddpg.py | 6 +++--- examples/lunarlander_continuous_v2/bc-sac.py | 6 +++--- examples/lunarlander_continuous_v2/ddpg.py | 4 ++-- examples/lunarlander_continuous_v2/ddpgfd.py | 4 ++-- examples/lunarlander_continuous_v2/per-ddpg.py | 4 ++-- examples/lunarlander_continuous_v2/ppo.py | 4 ++-- examples/lunarlander_continuous_v2/sac.py | 4 ++-- examples/lunarlander_continuous_v2/sacfd.py | 4 ++-- examples/lunarlander_continuous_v2/td3.py | 4 ++-- examples/lunarlander_continuous_v2/utils.py | 6 +++--- examples/lunarlander_v2/dqfd.py | 4 ++-- examples/lunarlander_v2/dqn.py | 4 ++-- examples/pong_no_frameskip_v4/dqn.py | 4 ++-- examples/reacher_v2/bc-ddpg.py | 8 ++++---- examples/reacher_v2/bc-sac.py | 6 +++--- examples/reacher_v2/ddpg.py | 4 ++-- examples/reacher_v2/sac.py | 4 ++-- examples/reacher_v2/td3.py | 4 ++-- examples/reacher_v2/utils.py | 6 +++--- 32 files changed, 93 insertions(+), 95 deletions(-) diff --git a/algorithms/a2c/agent.py b/algorithms/a2c/agent.py index 46a93e6b..0af29f94 100644 --- a/algorithms/a2c/agent.py +++ b/algorithms/a2c/agent.py @@ -16,12 +16,12 @@ import torch.nn.functional as F import wandb -from algorithms.common.abstract.agent import Agent as AbstractAgent +from algorithms.common.abstract.agent import Agent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(AbstractAgent): +class A2CAgent(Agent): """1-Step Advantage Actor-Critic interacting with environment. Attributes: @@ -55,7 +55,7 @@ def __init__( optims (tuple): optimizers for actor and critic """ - AbstractAgent.__init__(self, env, args) + Agent.__init__(self, env, args) self.actor, self.critic = models self.actor_optimizer, self.critic_optimizer = optims @@ -158,7 +158,7 @@ def save_params(self, n_episode: int): "critic_optim_state_dict": self.critic_optimizer.state_dict(), } - AbstractAgent.save_params(self, params, n_episode) + Agent.save_params(self, params, n_episode) def write_log(self, i: int, score: int, policy_loss: float, value_loss: float): total_loss = policy_loss + value_loss diff --git a/algorithms/bc/ddpg_agent.py b/algorithms/bc/ddpg_agent.py index 7c3e69e8..a51cf3d8 100644 --- a/algorithms/bc/ddpg_agent.py +++ b/algorithms/bc/ddpg_agent.py @@ -16,20 +16,20 @@ import torch.nn.functional as F import wandb -from algorithms.common.abstract.her import HER as AbstractHER +from algorithms.common.abstract.her import HER from algorithms.common.buffer.replay_buffer import ReplayBuffer import algorithms.common.helper_functions as common_utils from algorithms.common.noise import OUNoise -from algorithms.ddpg.agent import Agent as DDPGAgent +from algorithms.ddpg.agent import DDPGAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(DDPGAgent): +class BCDDPGAgent(DDPGAgent): """BC with DDPG agent interacting with environment. Attributes: - HER (AbstractHER): hinsight experience replay + her (HER): hinsight experience replay transitions_epi (list): transitions per episode (for HER) desired_state (np.ndarray): desired state of current episode memory (ReplayBuffer): replay memory @@ -47,14 +47,14 @@ def __init__( models: tuple, optims: tuple, noise: OUNoise, - HER: AbstractHER, + her: HER, ): """Initialization. Args: - HER (AbstractHER): hinsight experience replay + her (HER): hinsight experience replay """ - self.HER = HER + self.her = her DDPGAgent.__init__(self, env, args, hyper_params, models, optims, noise) # pylint: disable=attribute-defined-outside-init @@ -66,7 +66,6 @@ def _initialize(self): # HER if self.hyper_params["USE_HER"]: - self.her = self.HER() if self.hyper_params["DESIRED_STATES_FROM_DEMO"]: self.her.fetch_desired_states_from_demo(demo) diff --git a/algorithms/bc/sac_agent.py b/algorithms/bc/sac_agent.py index 1d1d4e42..d77b6e04 100644 --- a/algorithms/bc/sac_agent.py +++ b/algorithms/bc/sac_agent.py @@ -18,19 +18,19 @@ import torch.nn.functional as F import wandb -from algorithms.common.abstract.her import HER as AbstractHER +from algorithms.common.abstract.her import HER from algorithms.common.buffer.replay_buffer import ReplayBuffer import algorithms.common.helper_functions as common_utils -from algorithms.sac.agent import Agent as SACAgent +from algorithms.sac.agent import SACAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(SACAgent): +class BCSACAgent(SACAgent): """BC with SAC agent interacting with environment. Attrtibutes: - HER (AbstractHER): hinsight experience replay + her (HER): hinsight experience replay transitions_epi (list): transitions per episode (for HER) desired_state (np.ndarray): desired state of current episode memory (ReplayBuffer): replay memory @@ -48,14 +48,14 @@ def __init__( models: tuple, optims: tuple, target_entropy: float, - HER: AbstractHER, + her: HER, ): """Initialization. Args: - HER (AbstractHER): hinsight experience replay + her (HER): hinsight experience replay """ - self.HER = HER + self.her = her SACAgent.__init__(self, env, args, hyper_params, models, optims, target_entropy) # pylint: disable=attribute-defined-outside-init @@ -67,7 +67,6 @@ def _initialize(self): # HER if self.hyper_params["USE_HER"]: - self.her = self.HER() if self.hyper_params["DESIRED_STATES_FROM_DEMO"]: self.her.fetch_desired_states_from_demo(demo) diff --git a/algorithms/ddpg/agent.py b/algorithms/ddpg/agent.py index 8b774c6a..e00eed3f 100644 --- a/algorithms/ddpg/agent.py +++ b/algorithms/ddpg/agent.py @@ -16,7 +16,7 @@ import torch.nn.functional as F import wandb -from algorithms.common.abstract.agent import Agent as AbstractAgent +from algorithms.common.abstract.agent import Agent from algorithms.common.buffer.replay_buffer import ReplayBuffer import algorithms.common.helper_functions as common_utils from algorithms.common.noise import OUNoise @@ -24,7 +24,7 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(AbstractAgent): +class DDPGAgent(Agent): """ActorCritic interacting with environment. Attributes: @@ -64,7 +64,7 @@ def __init__( noise (OUNoise): random noise for exploration """ - AbstractAgent.__init__(self, env, args) + Agent.__init__(self, env, args) self.actor, self.actor_target, self.critic, self.critic_target = models self.actor_optimizer, self.critic_optimizer = optims @@ -196,7 +196,7 @@ def save_params(self, n_episode: int): "critic_optim_state_dict": self.critic_optimizer.state_dict(), } - AbstractAgent.save_params(self, params, n_episode) + Agent.save_params(self, params, n_episode) def write_log(self, i: int, loss: np.ndarray, score: int): """Write log about loss and score""" diff --git a/algorithms/dqn/agent.py b/algorithms/dqn/agent.py index b6775322..dd81c76b 100644 --- a/algorithms/dqn/agent.py +++ b/algorithms/dqn/agent.py @@ -22,7 +22,7 @@ from torch.nn.utils import clip_grad_norm_ import wandb -from algorithms.common.abstract.agent import Agent as AbstractAgent +from algorithms.common.abstract.agent import Agent from algorithms.common.buffer.priortized_replay_buffer import PrioritizedReplayBuffer from algorithms.common.buffer.replay_buffer import NStepTransitionBuffer import algorithms.common.helper_functions as common_utils @@ -31,7 +31,7 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(AbstractAgent): +class DQNAgent(Agent): """DQN interacting with environment. Attribute: @@ -69,7 +69,7 @@ def __init__( optim (torch.optim.Adam): optimizers for dqn """ - AbstractAgent.__init__(self, env, args) + Agent.__init__(self, env, args) self.use_n_step = hyper_params["N_STEP"] > 1 self.epsilon = hyper_params["MAX_EPSILON"] @@ -261,7 +261,7 @@ def save_params(self, n_episode: int): "dqn_optim_state_dict": self.dqn_optimizer.state_dict(), } - AbstractAgent.save_params(self, params, n_episode) + Agent.save_params(self, params, n_episode) def write_log(self, i: int, loss: np.ndarray, score: float): """Write log about loss and score""" diff --git a/algorithms/fd/ddpg_agent.py b/algorithms/fd/ddpg_agent.py index 39e058ce..cd690dba 100644 --- a/algorithms/fd/ddpg_agent.py +++ b/algorithms/fd/ddpg_agent.py @@ -17,12 +17,12 @@ from algorithms.common.buffer.priortized_replay_buffer import PrioritizedReplayBufferfD from algorithms.common.buffer.replay_buffer import NStepTransitionBuffer import algorithms.common.helper_functions as common_utils -from algorithms.ddpg.agent import Agent as DDPGAgent +from algorithms.ddpg.agent import DDPGAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(DDPGAgent): +class DDPGfDAgent(DDPGAgent): """ActorCritic interacting with environment. Attributes: diff --git a/algorithms/fd/dqn_agent.py b/algorithms/fd/dqn_agent.py index dde5170d..af305621 100644 --- a/algorithms/fd/dqn_agent.py +++ b/algorithms/fd/dqn_agent.py @@ -22,12 +22,12 @@ from algorithms.common.buffer.priortized_replay_buffer import PrioritizedReplayBufferfD from algorithms.common.buffer.replay_buffer import NStepTransitionBuffer import algorithms.common.helper_functions as common_utils -from algorithms.dqn.agent import Agent as DQNAgent +from algorithms.dqn.agent import DQNAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(DQNAgent): +class DQNfDAgent(DQNAgent): """DQN interacting with environment. Attribute: diff --git a/algorithms/fd/sac_agent.py b/algorithms/fd/sac_agent.py index 347b289b..30178c76 100644 --- a/algorithms/fd/sac_agent.py +++ b/algorithms/fd/sac_agent.py @@ -18,12 +18,12 @@ from algorithms.common.buffer.priortized_replay_buffer import PrioritizedReplayBufferfD from algorithms.common.buffer.replay_buffer import NStepTransitionBuffer import algorithms.common.helper_functions as common_utils -from algorithms.sac.agent import Agent as SACAgent +from algorithms.sac.agent import SACAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(SACAgent): +class SACfDAgent(SACAgent): """SAC agent interacting with environment. Attrtibutes: diff --git a/algorithms/per/ddpg_agent.py b/algorithms/per/ddpg_agent.py index edf21754..3821fd6d 100644 --- a/algorithms/per/ddpg_agent.py +++ b/algorithms/per/ddpg_agent.py @@ -13,12 +13,12 @@ from algorithms.common.buffer.priortized_replay_buffer import PrioritizedReplayBuffer import algorithms.common.helper_functions as common_utils -from algorithms.ddpg.agent import Agent as DDPGAgent +from algorithms.ddpg.agent import DDPGAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(DDPGAgent): +class DDPGPERAgent(DDPGAgent): """ActorCritic interacting with environment. Attributes: diff --git a/algorithms/ppo/agent.py b/algorithms/ppo/agent.py index dfd7baf6..ddbe6393 100644 --- a/algorithms/ppo/agent.py +++ b/algorithms/ppo/agent.py @@ -16,14 +16,14 @@ import torch.nn as nn import wandb -from algorithms.common.abstract.agent import Agent as AbstractAgent +from algorithms.common.abstract.agent import Agent from algorithms.common.env.multiprocessing_env import SubprocVecEnv import algorithms.ppo.utils as ppo_utils device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(AbstractAgent): +class PPOAgent(Agent): """PPO Agent. Attributes: @@ -64,7 +64,7 @@ def __init__( optims (tuple): optimizers for actor and critic """ - AbstractAgent.__init__(self, env_single, args) + Agent.__init__(self, env_single, args) if not self.args.test: self.env = env_multi @@ -251,7 +251,7 @@ def save_params(self, n_episode: int): "actor_optim_state_dict": self.actor_optimizer.state_dict(), "critic_optim_state_dict": self.critic_optimizer.state_dict(), } - AbstractAgent.save_params(self, params, n_episode) + Agent.save_params(self, params, n_episode) def write_log( self, diff --git a/algorithms/sac/agent.py b/algorithms/sac/agent.py index 8eb56ed1..0791ca03 100644 --- a/algorithms/sac/agent.py +++ b/algorithms/sac/agent.py @@ -18,14 +18,14 @@ import torch.optim as optim import wandb -from algorithms.common.abstract.agent import Agent as AbstractAgent +from algorithms.common.abstract.agent import Agent from algorithms.common.buffer.replay_buffer import ReplayBuffer import algorithms.common.helper_functions as common_utils device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(AbstractAgent): +class SACAgent(Agent): """SAC agent interacting with environment. Attrtibutes: @@ -71,7 +71,7 @@ def __init__( target_entropy (float): target entropy for the inequality constraint """ - AbstractAgent.__init__(self, env, args) + Agent.__init__(self, env, args) self.actor, self.vf, self.vf_target, self.qf_1, self.qf_2 = models self.actor_optimizer, self.vf_optimizer = optims[0:2] @@ -277,7 +277,7 @@ def save_params(self, n_episode: int): if self.hyper_params["AUTO_ENTROPY_TUNING"]: params["alpha_optim"] = self.alpha_optimizer.state_dict() - AbstractAgent.save_params(self, params, n_episode) + Agent.save_params(self, params, n_episode) def write_log( self, i: int, loss: np.ndarray, score: float = 0.0, delayed_update: int = 1 diff --git a/algorithms/td3/agent.py b/algorithms/td3/agent.py index b9f0d41c..63ecefc8 100644 --- a/algorithms/td3/agent.py +++ b/algorithms/td3/agent.py @@ -16,7 +16,7 @@ import torch.nn.functional as F import wandb -from algorithms.common.abstract.agent import Agent as AbstractAgent +from algorithms.common.abstract.agent import Agent from algorithms.common.buffer.replay_buffer import ReplayBuffer import algorithms.common.helper_functions as common_utils from algorithms.common.noise import GaussianNoise @@ -24,7 +24,7 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -class Agent(AbstractAgent): +class TD3Agent(Agent): """ActorCritic interacting with environment. Attributes: @@ -66,7 +66,7 @@ def __init__( noise (GaussianNoise): random noise for exploration """ - AbstractAgent.__init__(self, env, args) + Agent.__init__(self, env, args) self.actor, self.actor_target = models[0:2] self.critic_1, self.critic_2 = models[2:4] @@ -225,7 +225,7 @@ def save_params(self, n_episode: int): "critic_optim": self.critic_optimizer.state_dict(), } - AbstractAgent.save_params(self, params, n_episode) + Agent.save_params(self, params, n_episode) def write_log( self, i: int, loss: np.ndarray, score: float = 0.0, delayed_update: int = 1 diff --git a/examples/lunarlander_continuous_v2/a2c.py b/examples/lunarlander_continuous_v2/a2c.py index cb963f62..7173eb60 100644 --- a/examples/lunarlander_continuous_v2/a2c.py +++ b/examples/lunarlander_continuous_v2/a2c.py @@ -11,7 +11,7 @@ import torch import torch.optim as optim -from algorithms.a2c.agent import Agent +from algorithms.a2c.agent import A2CAgent from algorithms.common.networks.mlp import MLP, GaussianDist device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -65,7 +65,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims) + agent = A2CAgent(env, args, hyper_params, models, optims) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/bc-ddpg.py b/examples/lunarlander_continuous_v2/bc-ddpg.py index 0c266dc0..1a6e7b88 100644 --- a/examples/lunarlander_continuous_v2/bc-ddpg.py +++ b/examples/lunarlander_continuous_v2/bc-ddpg.py @@ -13,7 +13,7 @@ import torch import torch.optim as optim -from algorithms.bc.ddpg_agent import Agent +from algorithms.bc.ddpg_agent import BCDDPGAgent from algorithms.common.networks.mlp import MLP from algorithms.common.noise import OUNoise from examples.lunarlander_continuous_v2.utils import LunarLanderContinuousHER @@ -114,10 +114,10 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # HER - HER = LunarLanderContinuousHER if hyper_params["USE_HER"] else None + her = LunarLanderContinuousHER() if hyper_params["USE_HER"] else None # create an agent - agent = Agent(env, args, hyper_params, models, optims, noise, HER) + agent = BCDDPGAgent(env, args, hyper_params, models, optims, noise, her) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/bc-sac.py b/examples/lunarlander_continuous_v2/bc-sac.py index a2fbac7b..e3258b58 100644 --- a/examples/lunarlander_continuous_v2/bc-sac.py +++ b/examples/lunarlander_continuous_v2/bc-sac.py @@ -12,7 +12,7 @@ import torch import torch.optim as optim -from algorithms.bc.sac_agent import Agent +from algorithms.bc.sac_agent import BCSACAgent from algorithms.common.networks.mlp import MLP, FlattenMLP, TanhGaussianDistParams from examples.lunarlander_continuous_v2.utils import LunarLanderContinuousHER @@ -117,10 +117,10 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, vf_optim, qf_1_optim, qf_2_optim) # HER - HER = LunarLanderContinuousHER if hyper_params["USE_HER"] else None + her = LunarLanderContinuousHER() if hyper_params["USE_HER"] else None # create an agent - agent = Agent(env, args, hyper_params, models, optims, target_entropy, HER) + agent = BCSACAgent(env, args, hyper_params, models, optims, target_entropy, her) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/ddpg.py b/examples/lunarlander_continuous_v2/ddpg.py index 5da373f4..0db10528 100644 --- a/examples/lunarlander_continuous_v2/ddpg.py +++ b/examples/lunarlander_continuous_v2/ddpg.py @@ -13,7 +13,7 @@ from algorithms.common.networks.mlp import MLP from algorithms.common.noise import OUNoise -from algorithms.ddpg.agent import Agent +from algorithms.ddpg.agent import DDPGAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -101,7 +101,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims, noise) + agent = DDPGAgent(env, args, hyper_params, models, optims, noise) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/ddpgfd.py b/examples/lunarlander_continuous_v2/ddpgfd.py index c4f6c4b3..d5a98f04 100644 --- a/examples/lunarlander_continuous_v2/ddpgfd.py +++ b/examples/lunarlander_continuous_v2/ddpgfd.py @@ -13,7 +13,7 @@ from algorithms.common.networks.mlp import MLP from algorithms.common.noise import OUNoise -from algorithms.fd.ddpg_agent import Agent +from algorithms.fd.ddpg_agent import DDPGfDAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -109,7 +109,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims, noise) + agent = DDPGfDAgent(env, args, hyper_params, models, optims, noise) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/per-ddpg.py b/examples/lunarlander_continuous_v2/per-ddpg.py index fc038c65..33769669 100644 --- a/examples/lunarlander_continuous_v2/per-ddpg.py +++ b/examples/lunarlander_continuous_v2/per-ddpg.py @@ -13,7 +13,7 @@ from algorithms.common.networks.mlp import MLP from algorithms.common.noise import OUNoise -from algorithms.per.ddpg_agent import Agent +from algorithms.per.ddpg_agent import DDPGPERAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -104,7 +104,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims, noise) + agent = DDPGPERAgent(env, args, hyper_params, models, optims, noise) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/ppo.py b/examples/lunarlander_continuous_v2/ppo.py index 26b4aac4..dafea19e 100644 --- a/examples/lunarlander_continuous_v2/ppo.py +++ b/examples/lunarlander_continuous_v2/ppo.py @@ -14,7 +14,7 @@ from algorithms.common.env.utils import env_generator, make_envs from algorithms.common.networks.mlp import MLP, GaussianDist -from algorithms.ppo.agent import Agent +from algorithms.ppo.agent import PPOAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -92,7 +92,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # create an agent - agent = Agent(env_single, env_multi, args, hyper_params, models, optims) + agent = PPOAgent(env_single, env_multi, args, hyper_params, models, optims) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/sac.py b/examples/lunarlander_continuous_v2/sac.py index ae5a4cfa..344d6c9a 100644 --- a/examples/lunarlander_continuous_v2/sac.py +++ b/examples/lunarlander_continuous_v2/sac.py @@ -13,7 +13,7 @@ import torch.optim as optim from algorithms.common.networks.mlp import MLP, FlattenMLP, TanhGaussianDistParams -from algorithms.sac.agent import Agent +from algorithms.sac.agent import SACAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -106,7 +106,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, vf_optim, qf_1_optim, qf_2_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims, target_entropy) + agent = SACAgent(env, args, hyper_params, models, optims, target_entropy) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/sacfd.py b/examples/lunarlander_continuous_v2/sacfd.py index 82fdbe4a..665c9407 100644 --- a/examples/lunarlander_continuous_v2/sacfd.py +++ b/examples/lunarlander_continuous_v2/sacfd.py @@ -13,7 +13,7 @@ import torch.optim as optim from algorithms.common.networks.mlp import MLP, FlattenMLP, TanhGaussianDistParams -from algorithms.fd.sac_agent import Agent +from algorithms.fd.sac_agent import SACfDAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -112,7 +112,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, vf_optim, qf_1_optim, qf_2_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims, target_entropy) + agent = SACfDAgent(env, args, hyper_params, models, optims, target_entropy) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/td3.py b/examples/lunarlander_continuous_v2/td3.py index 9015c6bf..b73a05c5 100644 --- a/examples/lunarlander_continuous_v2/td3.py +++ b/examples/lunarlander_continuous_v2/td3.py @@ -13,7 +13,7 @@ from algorithms.common.networks.mlp import MLP from algorithms.common.noise import GaussianNoise -from algorithms.td3.agent import Agent +from algorithms.td3.agent import TD3Agent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -115,7 +115,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims, noise) + agent = TD3Agent(env, args, hyper_params, models, optims, noise) # run if args.test: diff --git a/examples/lunarlander_continuous_v2/utils.py b/examples/lunarlander_continuous_v2/utils.py index 0ab7fddd..7e8a5007 100644 --- a/examples/lunarlander_continuous_v2/utils.py +++ b/examples/lunarlander_continuous_v2/utils.py @@ -7,7 +7,7 @@ import numpy as np -from algorithms.common.abstract.her import HER as AbstractHER +from algorithms.common.abstract.her import HER from algorithms.common.abstract.reward_fn import RewardFn @@ -22,7 +22,7 @@ def __call__(self, transition: tuple, goal_state: np.ndarray) -> np.float64: return np.float64(-1.0) -class LunarLanderContinuousHER(AbstractHER): +class LunarLanderContinuousHER(HER): """HER for LunarLanderContinuous-v2 environment. Attributes: @@ -33,7 +33,7 @@ class LunarLanderContinuousHER(AbstractHER): def __init__(self, reward_func: RewardFn = L1DistanceRewardFn): """Initialization.""" - AbstractHER.__init__(self, reward_func=reward_func) + HER.__init__(self, reward_func=reward_func) # pylint: disable=attribute-defined-outside-init def fetch_desired_states_from_demo(self, demo: list): diff --git a/examples/lunarlander_v2/dqfd.py b/examples/lunarlander_v2/dqfd.py index 2c9155dc..3b373c96 100644 --- a/examples/lunarlander_v2/dqfd.py +++ b/examples/lunarlander_v2/dqfd.py @@ -12,7 +12,7 @@ import torch.optim as optim from algorithms.dqn.networks import C51DuelingMLP -from algorithms.fd.dqn_agent import Agent +from algorithms.fd.dqn_agent import DQNfDAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -89,7 +89,7 @@ def get_fc_model(): models = (dqn, dqn_target) # create an agent - agent = Agent(env, args, hyper_params, models, dqn_optim) + agent = DQNfDAgent(env, args, hyper_params, models, dqn_optim) # run if args.test: diff --git a/examples/lunarlander_v2/dqn.py b/examples/lunarlander_v2/dqn.py index 54a24de7..c824dfaf 100644 --- a/examples/lunarlander_v2/dqn.py +++ b/examples/lunarlander_v2/dqn.py @@ -11,7 +11,7 @@ import torch import torch.optim as optim -from algorithms.dqn.agent import Agent +from algorithms.dqn.agent import DQNAgent from algorithms.dqn.networks import C51DuelingMLP device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -85,7 +85,7 @@ def get_fc_model(): models = (dqn, dqn_target) # create an agent - agent = Agent(env, args, hyper_params, models, dqn_optim) + agent = DQNAgent(env, args, hyper_params, models, dqn_optim) # run if args.test: diff --git a/examples/pong_no_frameskip_v4/dqn.py b/examples/pong_no_frameskip_v4/dqn.py index 08d81da0..074b987e 100644 --- a/examples/pong_no_frameskip_v4/dqn.py +++ b/examples/pong_no_frameskip_v4/dqn.py @@ -12,7 +12,7 @@ import torch.optim as optim from algorithms.common.networks.cnn import CNNLayer -from algorithms.dqn.agent import Agent +from algorithms.dqn.agent import DQNAgent from algorithms.dqn.networks import IQNCNN, IQNMLP device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -101,7 +101,7 @@ def get_cnn_model(): models = (dqn, dqn_target) # create an agent - agent = Agent(env, args, hyper_params, models, dqn_optim) + agent = DQNAgent(env, args, hyper_params, models, dqn_optim) agent.env_name = env_name # run diff --git a/examples/reacher_v2/bc-ddpg.py b/examples/reacher_v2/bc-ddpg.py index 1a8a78aa..1e11e88a 100644 --- a/examples/reacher_v2/bc-ddpg.py +++ b/examples/reacher_v2/bc-ddpg.py @@ -12,7 +12,7 @@ import torch import torch.optim as optim -from algorithms.bc.ddpg_agent import Agent +from algorithms.bc.ddpg_agent import BCDDPGAgent from algorithms.common.networks.mlp import MLP from algorithms.common.noise import OUNoise from examples.reacher_v2.utils import ReacherHER @@ -36,7 +36,7 @@ "INITIAL_RANDOM_ACTION": 10000, "MULTIPLE_LEARN": 1, # HER - "USE_HER": True, + "USE_HER": False, "SUCCESS_SCORE": -5.0, "DESIRED_STATES_FROM_DEMO": False, } @@ -110,10 +110,10 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # HER - HER = ReacherHER if hyper_params["USE_HER"] else None + her = ReacherHER() if hyper_params["USE_HER"] else None # create an agent - agent = Agent(env, args, hyper_params, models, optims, noise, HER) + agent = BCDDPGAgent(env, args, hyper_params, models, optims, noise, her) # run if args.test: diff --git a/examples/reacher_v2/bc-sac.py b/examples/reacher_v2/bc-sac.py index 2d1558a6..5773eb95 100644 --- a/examples/reacher_v2/bc-sac.py +++ b/examples/reacher_v2/bc-sac.py @@ -12,7 +12,7 @@ import torch import torch.optim as optim -from algorithms.bc.sac_agent import Agent +from algorithms.bc.sac_agent import BCSACAgent from algorithms.common.networks.mlp import MLP, FlattenMLP, TanhGaussianDistParams from examples.reacher_v2.utils import ReacherHER @@ -114,10 +114,10 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, vf_optim, qf_1_optim, qf_2_optim) # HER - HER = ReacherHER if hyper_params["USE_HER"] else None + her = ReacherHER() if hyper_params["USE_HER"] else None # create an agent - agent = Agent(env, args, hyper_params, models, optims, target_entropy, HER) + agent = BCSACAgent(env, args, hyper_params, models, optims, target_entropy, her) # run if args.test: diff --git a/examples/reacher_v2/ddpg.py b/examples/reacher_v2/ddpg.py index d625c8c5..2a066f80 100644 --- a/examples/reacher_v2/ddpg.py +++ b/examples/reacher_v2/ddpg.py @@ -13,7 +13,7 @@ from algorithms.common.networks.mlp import MLP from algorithms.common.noise import OUNoise -from algorithms.ddpg.agent import Agent +from algorithms.ddpg.agent import DDPGAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -101,7 +101,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims, noise) + agent = DDPGAgent(env, args, hyper_params, models, optims, noise) # run if args.test: diff --git a/examples/reacher_v2/sac.py b/examples/reacher_v2/sac.py index 1f37124f..3f2607d2 100644 --- a/examples/reacher_v2/sac.py +++ b/examples/reacher_v2/sac.py @@ -13,7 +13,7 @@ import torch.optim as optim from algorithms.common.networks.mlp import MLP, FlattenMLP, TanhGaussianDistParams -from algorithms.sac.agent import Agent +from algorithms.sac.agent import SACAgent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -106,7 +106,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, vf_optim, qf_1_optim, qf_2_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims, target_entropy) + agent = SACAgent(env, args, hyper_params, models, optims, target_entropy) # run if args.test: diff --git a/examples/reacher_v2/td3.py b/examples/reacher_v2/td3.py index c553a649..c488afad 100644 --- a/examples/reacher_v2/td3.py +++ b/examples/reacher_v2/td3.py @@ -13,7 +13,7 @@ from algorithms.common.networks.mlp import MLP from algorithms.common.noise import GaussianNoise -from algorithms.td3.agent import Agent +from algorithms.td3.agent import TD3Agent device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") @@ -115,7 +115,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int) optims = (actor_optim, critic_optim) # create an agent - agent = Agent(env, args, hyper_params, models, optims, noise) + agent = TD3Agent(env, args, hyper_params, models, optims, noise) # run if args.test: diff --git a/examples/reacher_v2/utils.py b/examples/reacher_v2/utils.py index ec228599..d2c73ba7 100644 --- a/examples/reacher_v2/utils.py +++ b/examples/reacher_v2/utils.py @@ -7,7 +7,7 @@ import numpy as np -from algorithms.common.abstract.her import HER as AbstractHER +from algorithms.common.abstract.her import HER from algorithms.common.abstract.reward_fn import RewardFn @@ -22,12 +22,12 @@ def __call__(self, transition: tuple, _) -> np.float64: return reward_dist + reward_ctrl -class ReacherHER(AbstractHER): +class ReacherHER(HER): """HER for Reacher-v2 environment.""" def __init__(self, reward_func: RewardFn = ReacherRewardFn): """Initialization.""" - AbstractHER.__init__(self, reward_func=reward_func) + HER.__init__(self, reward_func=reward_func) def fetch_desired_states_from_demo(self, _: list): """Return desired goal states from demonstration data. From bd76239684d55a92893106a3ceee9cde90294b4d Mon Sep 17 00:00:00 2001 From: darthegg Date: Thu, 11 Apr 2019 15:09:21 +0900 Subject: [PATCH 3/5] Change Classname DDPGPER to PERDDPG in Class diagram --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b706af5f..f45b5915 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,7 @@ python -h ### Class Diagram Class diagram drawn on [e447f3e](https://github.com/medipixel/rl_algorithms/commit/e447f3e743f6f85505f2275b646e46f0adcf8f89). This won't be frequently updated. -![rl_algorithms_cls](https://user-images.githubusercontent.com/14961526/55703648-26022a80-5a15-11e9-8099-9bbfdffcb96d.png) +![RL_Algorithms_ClassDiagram](https://user-images.githubusercontent.com/16010242/55934443-812d5a80-5c6b-11e9-9b31-fa8214965a55.png) ### W&B for logging We use [W&B](https://www.wandb.com/) for logging of network parameters and others. For more details, read [W&B tutorial](https://docs.wandb.com/docs/started.html). From 492788002c6aaf4ca1c0357c38aad5f8d175ff0c Mon Sep 17 00:00:00 2001 From: darthegg Date: Thu, 11 Apr 2019 15:14:32 +0900 Subject: [PATCH 4/5] Change Classname DDPGPER to PERDDPG in Class diagram --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f45b5915..1dbcc986 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,8 @@ python -h - Load the saved models and optimizers at the beginning. ### Class Diagram -Class diagram drawn on [e447f3e](https://github.com/medipixel/rl_algorithms/commit/e447f3e743f6f85505f2275b646e46f0adcf8f89). This won't be frequently updated. +Class diagram drawn on [bd76239] (https://github.com/medipixel/rl_algorithms/pull/135/commits/bd76239684d55a92893106a3ceee9cde90294b4d) +This won't be frequently updated. ![RL_Algorithms_ClassDiagram](https://user-images.githubusercontent.com/16010242/55934443-812d5a80-5c6b-11e9-9b31-fa8214965a55.png) ### W&B for logging From 178b063c98d70dda23e5bd79ab08c0e771473ecf Mon Sep 17 00:00:00 2001 From: darthegg Date: Thu, 11 Apr 2019 15:42:26 +0900 Subject: [PATCH 5/5] Change Class diagram hyperlink in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1dbcc986..f3f340a7 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ python -h - Load the saved models and optimizers at the beginning. ### Class Diagram -Class diagram drawn on [bd76239] (https://github.com/medipixel/rl_algorithms/pull/135/commits/bd76239684d55a92893106a3ceee9cde90294b4d) +Class diagram drawn on [Pull Request #135] (https://github.com/medipixel/rl_algorithms/pull/135) This won't be frequently updated. ![RL_Algorithms_ClassDiagram](https://user-images.githubusercontent.com/16010242/55934443-812d5a80-5c6b-11e9-9b31-fa8214965a55.png)