diff --git a/README.md b/README.md
index 04e387e1..b706af5f 100644
--- a/README.md
+++ b/README.md
@@ -30,8 +30,8 @@ We are warmly welcoming external contributors! :)
 6. [Behaviour Cloning (BC with DDPG, SAC)](https://github.com/medipixel/rl_algorithms/tree/master/algorithms/bc)
 7. [Prioritized Experience Replay (PER with DDPG)](https://github.com/medipixel/rl_algorithms/tree/master/algorithms/per)
 8. [From Demonstrations (DDPGfD, SACfD, DQfD)](https://github.com/medipixel/rl_algorithms/tree/master/algorithms/fd)
-9. [Rainbow DQN (without NoisyNet)](https://github.com/medipixel/rl_algorithms/tree/master/algorithms/dqn)
-10. [Rainbow IQN (without DuelingNet & NoisyNet)](https://github.com/medipixel/rl_algorithms/tree/master/algorithms/dqn)
+9. [Rainbow DQN](https://github.com/medipixel/rl_algorithms/tree/master/algorithms/dqn)
+10. [Rainbow IQN (without DuelingNet)](https://github.com/medipixel/rl_algorithms/tree/master/algorithms/dqn) - DuelingNet [degrades performance](https://github.com/medipixel/rl_algorithms/pull/137)
 
 ## Getting started
 We have tested each algorithm on some of the following environments.
@@ -109,6 +109,10 @@ python <run-file> -h
 - `--load-from <save-file-path>`
     - Load the saved models and optimizers at the beginning.
 
+### Class Diagram
+Class diagram drawn on [e447f3e](https://github.com/medipixel/rl_algorithms/commit/e447f3e743f6f85505f2275b646e46f0adcf8f89). This won't be frequently updated.
+![rl_algorithms_cls](https://user-images.githubusercontent.com/14961526/55703648-26022a80-5a15-11e9-8099-9bbfdffcb96d.png)
+
 ### W&B for logging
 We use [W&B](https://www.wandb.com/) for logging of network parameters and others. For more details, read [W&B tutorial](https://docs.wandb.com/docs/started.html).
 
@@ -128,5 +132,6 @@ We use [W&B](https://www.wandb.com/) for logging of network parameters and other
 12. [Z. Wang et al., "Dueling Network Architectures for Deep Reinforcement Learning." arXiv preprint arXiv:1511.06581, 2015.](https://arxiv.org/pdf/1511.06581.pdf)
 13. [T. Hester et al., "Deep Q-learning from Demonstrations." arXiv preprint arXiv:1704.03732, 2017.](https://arxiv.org/pdf/1704.03732.pdf)
 14. [M. G. Bellemare et al., "A Distributional Perspective on Reinforcement Learning." arXiv preprint arXiv:1707.06887, 2017.](https://arxiv.org/pdf/1707.06887.pdf)
-15. [M. Hessel et al., "Rainbow: Combining Improvements in Deep Reinforcement Learning." arXiv preprint arXiv:1710.02298, 2017.](https://arxiv.org/pdf/1710.02298.pdf)
-16. [W. Dabney et al., "Implicit Quantile Networks for Distributional Reinforcement Learning." arXiv preprint arXiv:1806.06923, 2018.](https://arxiv.org/pdf/1806.06923.pdf)
+15. [M. Fortunato et al., "Noisy Networks for Exploration." arXiv preprint arXiv:1706.10295, 2017.](https://arxiv.org/pdf/1706.10295.pdf)
+16. [M. Hessel et al., "Rainbow: Combining Improvements in Deep Reinforcement Learning." arXiv preprint arXiv:1710.02298, 2017.](https://arxiv.org/pdf/1710.02298.pdf)
+17. [W. Dabney et al., "Implicit Quantile Networks for Distributional Reinforcement Learning." arXiv preprint arXiv:1806.06923, 2018.](https://arxiv.org/pdf/1806.06923.pdf)
diff --git a/algorithms/bc/sac_agent.py b/algorithms/bc/sac_agent.py
index d77b6e04..8b532016 100644
--- a/algorithms/bc/sac_agent.py
+++ b/algorithms/bc/sac_agent.py
@@ -115,8 +115,9 @@ def _add_transition_to_memory(self, transition: Tuple[np.ndarray, ...]):
 
     def update_model(self) -> Tuple[torch.Tensor, ...]:
         """Train the model after each episode."""
-        experiences = self.memory.sample()
-        demos = self.demo_memory.sample()
+        self.update_step += 1
+
+        experiences, demos = self.memory.sample(), self.demo_memory.sample()
 
         states, actions, rewards, next_states, dones = experiences
         demo_states, demo_actions, _, _, _ = demos
@@ -169,7 +170,7 @@ def update_model(self) -> Tuple[torch.Tensor, ...]:
         vf_loss.backward()
         self.vf_optimizer.step()
 
-        if self.total_step % self.hyper_params["DELAYED_UPDATE"] == 0:
+        if self.update_step % self.hyper_params["POLICY_UPDATE_FREQ"] == 0:
             # bc loss
             qf_mask = torch.gt(
                 self.qf_1(demo_states, demo_actions),
@@ -223,7 +224,7 @@ def update_model(self) -> Tuple[torch.Tensor, ...]:
         )
 
     def write_log(
-        self, i: int, loss: np.ndarray, score: float = 0.0, delayed_update: int = 1
+        self, i: int, loss: np.ndarray, score: float = 0.0, policy_update_freq: int = 1
     ):
         """Write log about loss and score"""
         total_loss = loss.sum()
@@ -238,7 +239,7 @@ def write_log(
                 self.total_step,
                 score,
                 total_loss,
-                loss[0] * delayed_update,  # actor loss
+                loss[0] * policy_update_freq,  # actor loss
                 loss[1],  # qf_1 loss
                 loss[2],  # qf_2 loss
                 loss[3],  # vf loss
@@ -252,7 +253,7 @@ def write_log(
                 {
                     "score": score,
                     "total loss": total_loss,
-                    "actor loss": loss[0] * delayed_update,
+                    "actor loss": loss[0] * policy_update_freq,
                     "qf_1 loss": loss[1],
                     "qf_2 loss": loss[2],
                     "vf loss": loss[3],
diff --git a/algorithms/common/abstract/reward_fn.py b/algorithms/common/abstract/reward_fn.py
index a64134c7..532865df 100644
--- a/algorithms/common/abstract/reward_fn.py
+++ b/algorithms/common/abstract/reward_fn.py
@@ -13,8 +13,6 @@ class RewardFn(ABC):
     """Abstract class for computing reward.
        New compute_reward class should redefine __call__()
 
-    Attributes:
-
     """
 
     @abstractmethod
diff --git a/algorithms/common/buffer/segment_tree.py b/algorithms/common/buffer/segment_tree.py
index 0e2d44cf..30c60f2f 100644
--- a/algorithms/common/buffer/segment_tree.py
+++ b/algorithms/common/buffer/segment_tree.py
@@ -102,7 +102,8 @@ def sum(self, start: int = 0, end: int = 0) -> float:
 
     def retrieve(self, upperbound: float) -> int:
         """Find the highest index `i` about upper bound in the tree"""
-        assert 0 <= upperbound <= self.sum() + 1e-5
+        # TODO: Check assert case and fix bug
+        assert 0 <= upperbound <= self.sum() + 1e-5, "upperbound: {}".format(upperbound)
 
         idx = 1
 
diff --git a/algorithms/common/env/atari_wrappers.py b/algorithms/common/env/atari_wrappers.py
index 8a4ffee5..d3e65f8e 100644
--- a/algorithms/common/env/atari_wrappers.py
+++ b/algorithms/common/env/atari_wrappers.py
@@ -8,7 +8,7 @@
 
 import cv2
 import gym
-from gym import spaces
+import gym.spaces as spaces
 import numpy as np
 
 os.environ.setdefault("PATH", "")
diff --git a/algorithms/common/networks/mlp.py b/algorithms/common/networks/mlp.py
index 66ac125f..4734c393 100644
--- a/algorithms/common/networks/mlp.py
+++ b/algorithms/common/networks/mlp.py
@@ -31,6 +31,14 @@ def concat(
     return in_concat
 
 
+def init_layer_uniform(layer: nn.Linear, init_w: float = 3e-3) -> nn.Linear:
+    """Init uniform parameters on the single layer"""
+    layer.weight.data.uniform_(-init_w, init_w)
+    layer.bias.data.uniform_(-init_w, init_w)
+
+    return layer
+
+
 class MLP(nn.Module):
     """Baseline of Multilayer perceptron.
 
@@ -53,9 +61,10 @@ def __init__(
         hidden_sizes: list,
         hidden_activation: Callable = F.relu,
         output_activation: Callable = identity,
+        linear_layer: nn.Module = nn.Linear,
         use_output_layer: bool = True,
         n_category: int = -1,
-        init_w: float = 3e-3,
+        init_fn: Callable = init_layer_uniform,
     ):
         """Initialization.
 
@@ -65,9 +74,10 @@ def __init__(
             hidden_sizes (list): number of hidden layers
             hidden_activation (function): activation function of hidden layers
             output_activation (function): activation function of output layer
+            linear_layer (nn.Module): linear layer of mlp
             use_output_layer (bool): whether or not to use the last layer
             n_category (int): category number (-1 if the action is continuous)
-            init_w (float): weight initialization bound for the last layer
+            init_fn (Callable): weight initialization function bound for the last layer
 
         """
         super(MLP, self).__init__()
@@ -77,6 +87,7 @@ def __init__(
         self.output_size = output_size
         self.hidden_activation = hidden_activation
         self.output_activation = output_activation
+        self.linear_layer = linear_layer
         self.use_output_layer = use_output_layer
         self.n_category = n_category
 
@@ -84,16 +95,15 @@ def __init__(
         self.hidden_layers: list = []
         in_size = self.input_size
         for i, next_size in enumerate(hidden_sizes):
-            fc = nn.Linear(in_size, next_size)
+            fc = self.linear_layer(in_size, next_size)
             in_size = next_size
             self.__setattr__("hidden_fc{}".format(i), fc)
             self.hidden_layers.append(fc)
 
         # set output layers
         if self.use_output_layer:
-            self.output_layer = nn.Linear(in_size, output_size)
-            self.output_layer.weight.data.uniform_(-init_w, init_w)
-            self.output_layer.bias.data.uniform_(-init_w, init_w)
+            self.output_layer = self.linear_layer(in_size, output_size)
+            self.output_layer = init_fn(self.output_layer)
         else:
             self.output_layer = identity
             self.output_activation = identity
@@ -137,7 +147,7 @@ def __init__(
         mu_activation: Callable = torch.tanh,
         log_std_min: float = -20,
         log_std_max: float = 2,
-        init_w: float = 3e-3,
+        init_fn: Callable = init_layer_uniform,
     ):
         """Initialization."""
         super(GaussianDist, self).__init__(
@@ -155,13 +165,11 @@ def __init__(
 
         # set log_std layer
         self.log_std_layer = nn.Linear(in_size, output_size)
-        self.log_std_layer.weight.data.uniform_(-init_w, init_w)
-        self.log_std_layer.bias.data.uniform_(-init_w, init_w)
+        self.log_std_layer = init_fn(self.log_std_layer)
 
         # set mean layer
         self.mu_layer = nn.Linear(in_size, output_size)
-        self.mu_layer.weight.data.uniform_(-init_w, init_w)
-        self.mu_layer.bias.data.uniform_(-init_w, init_w)
+        self.mu_layer = init_fn(self.mu_layer)
 
     def get_dist_params(self, x: torch.Tensor) -> Tuple[torch.Tensor, ...]:
         """Return gausian distribution parameters."""
@@ -229,7 +237,7 @@ def __init__(
         output_size: int,
         hidden_sizes: list,
         hidden_activation: Callable = F.relu,
-        init_w: float = 3e-3,
+        init_fn: Callable = init_layer_uniform,
     ):
         """Initialization."""
         super(CategoricalDist, self).__init__(
@@ -244,8 +252,7 @@ def __init__(
 
         # set log_std layer
         self.last_layer = nn.Linear(in_size, output_size)
-        self.last_layer.weight.data.uniform_(-init_w, init_w)
-        self.last_layer.bias.data.uniform_(-init_w, init_w)
+        self.last_layer = init_fn(self.last_layer)
 
     def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, ...]:
         """Forward method implementation."""
diff --git a/algorithms/common/noise.py b/algorithms/common/noise.py
index b6470524..25f9190d 100644
--- a/algorithms/common/noise.py
+++ b/algorithms/common/noise.py
@@ -15,21 +15,23 @@ class GaussianNoise:
 
     def __init__(
         self,
+        action_dim: int,
         min_sigma: float = 1.0,
         max_sigma: float = 1.0,
         decay_period: int = 1000000,
     ):
         """Initialization."""
+        self.action_dim = action_dim
         self.max_sigma = max_sigma
         self.min_sigma = min_sigma
         self.decay_period = decay_period
 
-    def sample(self, action_size: int, t: int = 0) -> float:
+    def sample(self, t: int = 0) -> float:
         """Get an action with gaussian noise."""
         sigma = self.max_sigma - (self.max_sigma - self.min_sigma) * min(
             1.0, t / self.decay_period
         )
-        return np.random.normal(0, sigma, size=action_size)
+        return np.random.normal(0, sigma, size=self.action_dim)
 
 
 class OUNoise:
diff --git a/algorithms/dqn/agent.py b/algorithms/dqn/agent.py
index dd81c76b..17884606 100644
--- a/algorithms/dqn/agent.py
+++ b/algorithms/dqn/agent.py
@@ -7,13 +7,15 @@
          https://arxiv.org/pdf/1509.06461.pdf (Double DQN)
          https://arxiv.org/pdf/1511.05952.pdf (PER)
          https://arxiv.org/pdf/1511.06581.pdf (Dueling)
+         https://arxiv.org/pdf/1706.10295.pdf (NoisyNet)
          https://arxiv.org/pdf/1707.06887.pdf (C51)
+         https://arxiv.org/pdf/1710.02298.pdf (Rainbow)
          https://arxiv.org/pdf/1806.06923.pdf (IQN)
 """
 
 import argparse
-import datetime
 import os
+import time
 from typing import Tuple
 
 import gym
@@ -191,7 +193,7 @@ def _get_dqn_loss(
                 gamma=gamma,
             )
 
-    def update_model(self) -> torch.Tensor:
+    def update_model(self) -> Tuple[torch.Tensor, torch.Tensor]:
         """Train the model after each episode."""
         # 1 step loss
         experiences_1 = self.memory.sample(self.beta)
@@ -239,6 +241,10 @@ def update_model(self) -> torch.Tensor:
         fraction = min(float(self.i_episode) / self.args.episode_num, 1.0)
         self.beta = self.beta + fraction * (1.0 - self.beta)
 
+        if self.hyper_params["USE_NOISY_NET"]:
+            self.dqn.reset_noise()
+            self.dqn_target.reset_noise()
+
         return loss.data, q_values.mean().data
 
     def load_params(self, path: str):
@@ -263,11 +269,11 @@ def save_params(self, n_episode: int):
 
         Agent.save_params(self, params, n_episode)
 
-    def write_log(self, i: int, loss: np.ndarray, score: float):
+    def write_log(self, i: int, loss: np.ndarray, score: float, avg_time_cost: float):
         """Write log about loss and score"""
         print(
             "[INFO] episode %d, episode step: %d, total step: %d, total score: %f\n"
-            "epsilon: %f, loss: %f, avg q-value: %f at %s\n"
+            "epsilon: %f, loss: %f, avg q-value: %f (spent %.6f sec/step)\n"
             % (
                 i,
                 self.episode_step,
@@ -276,12 +282,20 @@ def write_log(self, i: int, loss: np.ndarray, score: float):
                 self.epsilon,
                 loss[0],
                 loss[1],
-                datetime.datetime.now(),
+                avg_time_cost,
             )
         )
 
         if self.args.log:
-            wandb.log({"score": score, "dqn loss": loss[0], "epsilon": self.epsilon})
+            wandb.log(
+                {
+                    "score": score,
+                    "epsilon": self.epsilon,
+                    "dqn loss": loss[0],
+                    "avg q values": loss[1],
+                    "time per each step": avg_time_cost,
+                }
+            )
 
     # pylint: disable=no-self-use, unnecessary-pass
     def pretrain(self):
@@ -312,6 +326,8 @@ def train(self):
             done = False
             score = 0
 
+            t_begin = time.time()
+
             while not done:
                 if self.args.render and self.i_episode >= self.args.render_after:
                     self.env.render()
@@ -334,9 +350,12 @@ def train(self):
                 state = next_state
                 score += reward
 
+            t_end = time.time()
+            avg_time_cost = (t_end - t_begin) / self.episode_step
+
             if losses:
                 avg_loss = np.vstack(losses).mean(axis=0)
-                self.write_log(self.i_episode, avg_loss, score)
+                self.write_log(self.i_episode, avg_loss, score, avg_time_cost)
 
             if self.i_episode % self.args.save_period == 0:
                 self.save_params(self.i_episode)
diff --git a/algorithms/dqn/linear.py b/algorithms/dqn/linear.py
new file mode 100644
index 00000000..b2907013
--- /dev/null
+++ b/algorithms/dqn/linear.py
@@ -0,0 +1,114 @@
+# -*- coding: utf-8 -*-
+"""Linear module for dqn algorithms
+
+- Author: Kh Kim
+- Contact: kh.kim@medipixel.io
+"""
+
+import math
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+
+class NoisyLinear(nn.Module):
+    """Noisy linear module for NoisyNet.
+
+    References:
+        https://github.com/higgsfield/RL-Adventure/blob/master/5.noisy%20dqn.ipynb
+        https://github.com/Kaixhin/Rainbow/blob/master/model.py
+
+    Attributes:
+        in_features (int): input size of linear module
+        out_features (int): output size of linear module
+        std_init (float): initial std value
+        weight_mu (nn.Parameter): mean value weight parameter
+        weight_sigma (nn.Parameter): std value weight parameter
+        bias_mu (nn.Parameter): mean value bias parameter
+        bias_sigma (nn.Parameter): std value bias parameter
+
+    """
+
+    def __init__(self, in_features: int, out_features: int, std_init: float = 0.5):
+        """Initialization."""
+        super(NoisyLinear, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.std_init = std_init
+
+        self.weight_mu = nn.Parameter(torch.Tensor(out_features, in_features))
+        self.weight_sigma = nn.Parameter(torch.Tensor(out_features, in_features))
+        self.register_buffer("weight_epsilon", torch.Tensor(out_features, in_features))
+
+        self.bias_mu = nn.Parameter(torch.Tensor(out_features))
+        self.bias_sigma = nn.Parameter(torch.Tensor(out_features))
+        self.register_buffer("bias_epsilon", torch.Tensor(out_features))
+
+        self.reset_parameters()
+        self.reset_noise()
+
+    def reset_parameters(self):
+        """Reset trainable network parameters (factorized gaussian noise)."""
+        mu_range = 1 / math.sqrt(self.in_features)
+        self.weight_mu.data.uniform_(-mu_range, mu_range)
+        self.weight_sigma.data.fill_(self.std_init / math.sqrt(self.in_features))
+        self.bias_mu.data.uniform_(-mu_range, mu_range)
+        self.bias_sigma.data.fill_(self.std_init / math.sqrt(self.out_features))
+
+    @staticmethod
+    def scale_noise(size: int) -> torch.Tensor:
+        """Set scale to make noise (factorized gaussian noise)."""
+        x = torch.FloatTensor(np.random.normal(loc=0.0, scale=1.0, size=size))
+
+        return x.sign().mul(x.abs().sqrt())
+
+    def reset_noise(self):
+        """Make new noise."""
+        epsilon_in = self.scale_noise(self.in_features)
+        epsilon_out = self.scale_noise(self.out_features)
+
+        # outer product
+        self.weight_epsilon.copy_(epsilon_out.ger(epsilon_in))
+        self.bias_epsilon.copy_(epsilon_out)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Forward method implementation.
+
+        We don't use separate statements on train / eval mode.
+        It doesn't show remarkable difference of performance.
+        """
+        return F.linear(
+            x,
+            self.weight_mu + self.weight_sigma * self.weight_epsilon,
+            self.bias_mu + self.bias_sigma * self.bias_epsilon,
+        )
+
+
+class NoisyLinearConstructor:
+    """Constructor class for changing hyper parameters of NoisyLinear.
+
+    Attributes:
+        std_init (float): initial std value
+
+    """
+
+    def __init__(self, std_init: float = 0.5):
+        """Initialization."""
+        self.std_init = std_init
+
+    def __call__(self, in_features: int, out_features: int) -> NoisyLinear:
+        """Return NoisyLinear instance set hyper parameters"""
+        return NoisyLinear(in_features, out_features, self.std_init)
+
+
+class NoisyMLPHandler:
+    """Includes methods to handle noisy linear."""
+
+    def reset_noise(self):
+        """Re-sample noise"""
+        for _, module in self.named_children():
+            module.reset_noise()
diff --git a/algorithms/dqn/networks.py b/algorithms/dqn/networks.py
index a6848750..4ab3c5a4 100644
--- a/algorithms/dqn/networks.py
+++ b/algorithms/dqn/networks.py
@@ -14,12 +14,13 @@
 import torch.nn.functional as F
 
 from algorithms.common.networks.cnn import CNN
-from algorithms.common.networks.mlp import MLP
+from algorithms.common.networks.mlp import MLP, init_layer_uniform
+from algorithms.dqn.linear import NoisyMLPHandler
 
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 
-class DuelingMLP(MLP):
+class DuelingMLP(MLP, NoisyMLPHandler):
     """Multilayer perceptron with dueling construction."""
 
     def __init__(
@@ -28,7 +29,8 @@ def __init__(
         output_size: int,
         hidden_sizes: list,
         hidden_activation: Callable = F.relu,
-        init_w: float = 3e-3,
+        linear_layer: nn.Module = nn.Linear,
+        init_fn: Callable = init_layer_uniform,
     ):
         """Initialization."""
         super(DuelingMLP, self).__init__(
@@ -36,21 +38,20 @@ def __init__(
             output_size=output_size,
             hidden_sizes=hidden_sizes,
             hidden_activation=hidden_activation,
+            linear_layer=linear_layer,
             use_output_layer=False,
         )
         in_size = hidden_sizes[-1]
 
         # set advantage layer
-        self.advantage_hidden_layer = nn.Linear(in_size, in_size)
-        self.advantage_layer = nn.Linear(in_size, output_size)
-        self.advantage_layer.weight.data.uniform_(-init_w, init_w)
-        self.advantage_layer.bias.data.uniform_(-init_w, init_w)
+        self.advantage_hidden_layer = self.linear_layer(in_size, in_size)
+        self.advantage_layer = self.linear_layer(in_size, output_size)
+        self.advantage_layer = init_fn(self.advantage_layer)
 
         # set value layer
-        self.value_hidden_layer = nn.Linear(in_size, in_size)
-        self.value_layer = nn.Linear(in_size, 1)
-        self.value_layer.weight.data.uniform_(-init_w, init_w)
-        self.value_layer.bias.data.uniform_(-init_w, init_w)
+        self.value_hidden_layer = self.linear_layer(in_size, in_size)
+        self.value_layer = self.linear_layer(in_size, 1)
+        self.value_layer = init_fn(self.value_layer)
 
     def _forward_dueling(self, x: torch.Tensor) -> torch.Tensor:
         adv_x = self.hidden_activation(self.advantage_hidden_layer(x))
@@ -81,8 +82,12 @@ def forward_(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
         out = self.fc_layers.forward_(x)
         return out
 
+    def reset_noise(self):
+        """Re-sample noise for fc layers."""
+        self.fc_layers.reset_noise()
 
-class C51DuelingMLP(MLP):
+
+class C51DuelingMLP(MLP, NoisyMLPHandler):
     """Multilayered perceptron for C51 with dueling construction."""
 
     def __init__(
@@ -94,7 +99,8 @@ def __init__(
         v_min: int = -10,
         v_max: int = 10,
         hidden_activation: Callable = F.relu,
-        init_w: float = 3e-3,
+        linear_layer: nn.Module = nn.Linear,
+        init_fn: Callable = init_layer_uniform,
     ):
         """Initialization."""
         super(C51DuelingMLP, self).__init__(
@@ -102,6 +108,7 @@ def __init__(
             output_size=action_size,
             hidden_sizes=hidden_sizes,
             hidden_activation=hidden_activation,
+            linear_layer=linear_layer,
             use_output_layer=False,
         )
         in_size = hidden_sizes[-1]
@@ -111,16 +118,14 @@ def __init__(
         self.v_min, self.v_max = v_min, v_max
 
         # set advantage layer
-        self.advantage_hidden_layer = nn.Linear(in_size, in_size)
-        self.advantage_layer = nn.Linear(in_size, self.output_size)
-        self.advantage_layer.weight.data.uniform_(-init_w, init_w)
-        self.advantage_layer.bias.data.uniform_(-init_w, init_w)
+        self.advantage_hidden_layer = self.linear_layer(in_size, in_size)
+        self.advantage_layer = self.linear_layer(in_size, self.output_size)
+        self.advantage_layer = init_fn(self.advantage_layer)
 
         # set value layer
-        self.value_hidden_layer = nn.Linear(in_size, in_size)
-        self.value_layer = nn.Linear(in_size, self.atom_size)
-        self.value_layer.weight.data.uniform_(-init_w, init_w)
-        self.value_layer.bias.data.uniform_(-init_w, init_w)
+        self.value_hidden_layer = self.linear_layer(in_size, in_size)
+        self.value_layer = self.linear_layer(in_size, self.atom_size)
+        self.value_layer = init_fn(self.value_layer)
 
     def forward_(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
         """Get distribution for atoms."""
@@ -160,8 +165,12 @@ def forward_(
         out = self.fc_layers.forward_(x, n_tau_samples)
         return out
 
+    def reset_noise(self):
+        """Re-sample noise for fc layers."""
+        self.fc_layers.reset_noise()
+
 
-class IQNMLP(MLP):
+class IQNMLP(MLP, NoisyMLPHandler):
     """Multilayered perceptron for IQN with dueling construction.
 
     Reference: https://github.com/google/dopamine
@@ -175,7 +184,8 @@ def __init__(
         n_quantiles: int,
         quantile_embedding_dim: int,
         hidden_activation: Callable = F.relu,
-        init_w: float = 3e-3,
+        linear_layer: nn.Module = nn.Linear,
+        init_fn: Callable = init_layer_uniform,
     ):
         """Initialization."""
         super(IQNMLP, self).__init__(
@@ -183,6 +193,8 @@ def __init__(
             output_size=output_size,
             hidden_sizes=hidden_sizes,
             hidden_activation=hidden_activation,
+            linear_layer=linear_layer,
+            init_fn=init_fn,
         )
 
         IQNMLP.n_quantiles = n_quantiles
@@ -191,9 +203,10 @@ def __init__(
         self.output_size = output_size
 
         # set quantile_net layer
-        self.quantile_fc_layer = nn.Linear(self.quantile_embedding_dim, self.input_size)
-        self.quantile_fc_layer.weight.data.uniform_(-init_w, init_w)
-        self.quantile_fc_layer.bias.data.uniform_(-init_w, init_w)
+        self.quantile_fc_layer = self.linear_layer(
+            self.quantile_embedding_dim, self.input_size
+        )
+        self.quantile_fc_layer = init_fn(self.quantile_fc_layer)
 
     def forward_(
         self, state: torch.Tensor, n_tau_samples: int = None
diff --git a/algorithms/dqn/utils.py b/algorithms/dqn/utils.py
index 53d46e15..ac7d3bc5 100644
--- a/algorithms/dqn/utils.py
+++ b/algorithms/dqn/utils.py
@@ -45,7 +45,7 @@ def calculate_iqn_loss(
 
     # Get the indices of the maximium Q-value across the action dimension.
     # Shape of replay_next_qt_argmax: (n_tau_prime_samples x batch_size) x 1.
-    next_actions = target_model(next_states).argmax(dim=1)
+    next_actions = model(next_states).argmax(dim=1)  # double Q
     next_actions = next_actions[:, None]
     next_actions = next_actions.repeat(n_tau_prime_samples, 1)
 
@@ -145,7 +145,11 @@ def calculate_c51_loss(
     delta_z = float(v_max - v_min) / (atom_size - 1)
 
     with torch.no_grad():
+        # According to noisynet paper,
+        # it resamples noisynet parameters on online network when using double q
+        # but we don't because there is no remarkable difference in performance.
         next_actions = model.forward_(next_states)[1].argmax(1)
+
         next_dist = target_model.forward_(next_states)[0]
         next_dist = next_dist[range(batch_size), next_actions]
 
@@ -155,11 +159,6 @@ def calculate_c51_loss(
         l = b.floor().long()  # noqa: E741
         u = b.ceil().long()
 
-        # Fix disappearing probability mass when l = b = u (b is int)
-        # taken from https://github.com/Kaixhin/Rainbow
-        l[(u > 0) * (l == u)] -= 1  # noqa: E741
-        u[(l < (atom_size - 1)) * (l == u)] += 1  # noqa: E741
-
         offset = (
             torch.linspace(0, (batch_size - 1) * atom_size, batch_size)
             .long()
@@ -194,7 +193,11 @@ def calculate_dqn_loss(
     states, actions, rewards, next_states, dones = experiences[:5]
 
     q_values = model(states)
+    # According to noisynet paper,
+    # it resamples noisynet parameters on online network when using double q
+    # but we don't because there is no remarkable difference in performance.
     next_q_values = model(next_states)
+
     next_target_q_values = target_model(next_states)
 
     curr_q_value = q_values.gather(1, actions.long().unsqueeze(1))
diff --git a/algorithms/fd/ddpg_agent.py b/algorithms/fd/ddpg_agent.py
index cd690dba..8ee0f124 100644
--- a/algorithms/fd/ddpg_agent.py
+++ b/algorithms/fd/ddpg_agent.py
@@ -160,3 +160,4 @@ def pretrain(self):
                 avg_loss = np.vstack(pretrain_loss).mean(axis=0)
                 pretrain_loss.clear()
                 self.write_log(0, avg_loss, 0)
+        print("[INFO] Pre-Train Complete!\n")
diff --git a/algorithms/fd/dqn_agent.py b/algorithms/fd/dqn_agent.py
index af305621..ac5441da 100644
--- a/algorithms/fd/dqn_agent.py
+++ b/algorithms/fd/dqn_agent.py
@@ -3,15 +3,11 @@
 
 - Author: Kh Kim, Curt Park
 - Contact: kh.kim@medipixel.io, curt.park@medipixel.io
-- Paper: https://storage.googleapis.com/deepmind-media/dqn/DQNNaturePaper.pdf (DQN)
-         https://arxiv.org/pdf/1509.06461.pdf (Double DQN)
-         https://arxiv.org/pdf/1511.05952.pdf (PER)
-         https://arxiv.org/pdf/1511.06581.pdf (Dueling)
-         https://arxiv.org/pdf/1704.03732.pdf (DQfD)
+- Paper: https://arxiv.org/pdf/1704.03732.pdf (DQfD)
 """
 
-import datetime
 import pickle
+import time
 from typing import Tuple
 
 import numpy as np
@@ -27,7 +23,7 @@
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 
-class DQNfDAgent(DQNAgent):
+class DQfDAgent(DQNAgent):
     """DQN interacting with environment.
 
     Attribute:
@@ -142,6 +138,10 @@ def update_model(self) -> Tuple[torch.Tensor, ...]:
         fraction = min(float(self.i_episode) / self.args.episode_num, 1.0)
         self.beta = self.beta + fraction * (1.0 - self.beta)
 
+        if self.hyper_params["USE_NOISY_NET"]:
+            self.dqn.reset_noise()
+            self.dqn_target.reset_noise()
+
         return (
             loss.data,
             dq_loss.data,
@@ -150,12 +150,14 @@ def update_model(self) -> Tuple[torch.Tensor, ...]:
             n_demo,
         )
 
-    def write_log(self, i: int, avg_loss: np.ndarray, score: float = 0.0):
+    def write_log(
+        self, i: int, avg_loss: np.ndarray, score: float, avg_time_cost: float
+    ):
         """Write log about loss and score"""
         print(
             "[INFO] episode %d, episode step: %d, total step: %d, total score: %f\n"
             "epsilon: %f, total loss: %f, dq loss: %f, supervised loss: %f\n"
-            "avg q values: %f, demo num in minibatch: %d at %s\n"
+            "avg q values: %f, demo num in minibatch: %d (spent %.6f sec/step)\n"
             % (
                 i,
                 self.episode_step,
@@ -167,7 +169,7 @@ def write_log(self, i: int, avg_loss: np.ndarray, score: float = 0.0):
                 avg_loss[2],
                 avg_loss[3],
                 avg_loss[4],
-                datetime.datetime.now(),
+                avg_time_cost,
             )
         )
 
@@ -179,6 +181,9 @@ def write_log(self, i: int, avg_loss: np.ndarray, score: float = 0.0):
                     "total loss": avg_loss[0],
                     "dq loss": avg_loss[1],
                     "supervised loss": avg_loss[2],
+                    "avg q values": avg_loss[3],
+                    "demo num in minibatch": avg_loss[4],
+                    "time per each step": avg_time_cost,
                 }
             )
 
@@ -187,11 +192,14 @@ def pretrain(self):
         pretrain_loss = list()
         print("[INFO] Pre-Train %d step." % self.hyper_params["PRETRAIN_STEP"])
         for i_step in range(1, self.hyper_params["PRETRAIN_STEP"] + 1):
+            t_begin = time.time()
             loss = self.update_model()
+            t_end = time.time()
             pretrain_loss.append(loss)  # for logging
 
             # logging
             if i_step == 1 or i_step % 100 == 0:
                 avg_loss = np.vstack(pretrain_loss).mean(axis=0)
                 pretrain_loss.clear()
-                self.write_log(0, avg_loss)
+                self.write_log(0, avg_loss, 0.0, t_end - t_begin)
+        print("[INFO] Pre-Train Complete!\n")
diff --git a/algorithms/fd/sac_agent.py b/algorithms/fd/sac_agent.py
index 30178c76..08429bbe 100644
--- a/algorithms/fd/sac_agent.py
+++ b/algorithms/fd/sac_agent.py
@@ -79,6 +79,8 @@ def _add_transition_to_memory(self, transition: Tuple[np.ndarray, ...]):
     # pylint: disable=too-many-statements
     def update_model(self) -> Tuple[torch.Tensor, ...]:
         """Train the model after each episode."""
+        self.update_step += 1
+
         experiences = self.memory.sample(self.beta)
         states, actions, rewards, next_states, dones, weights, indices, eps_d = (
             experiences
@@ -149,7 +151,7 @@ def update_model(self) -> Tuple[torch.Tensor, ...]:
         vf_loss.backward()
         self.vf_optimizer.step()
 
-        if self.total_step % self.hyper_params["DELAYED_UPDATE"] == 0:
+        if self.update_step % self.hyper_params["POLICY_UPDATE_FREQ"] == 0:
             # actor loss
             advantage = q_pred - v_pred.detach()
             actor_loss_element_wise = alpha * log_prob - advantage
@@ -212,5 +214,9 @@ def pretrain(self):
                 avg_loss = np.vstack(pretrain_loss).mean(axis=0)
                 pretrain_loss.clear()
                 self.write_log(
-                    0, avg_loss, 0, delayed_update=self.hyper_params["DELAYED_UPDATE"]
+                    0,
+                    avg_loss,
+                    0,
+                    policy_update_freq=self.hyper_params["POLICY_UPDATE_FREQ"],
                 )
+        print("[INFO] Pre-Train Complete!\n")
diff --git a/algorithms/per/ddpg_agent.py b/algorithms/per/ddpg_agent.py
index 3821fd6d..5bfc6349 100644
--- a/algorithms/per/ddpg_agent.py
+++ b/algorithms/per/ddpg_agent.py
@@ -18,7 +18,7 @@
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 
-class DDPGPERAgent(DDPGAgent):
+class PERDDPGAgent(DDPGAgent):
     """ActorCritic interacting with environment.
 
     Attributes:
diff --git a/algorithms/sac/agent.py b/algorithms/sac/agent.py
index 0791ca03..fa1c5212 100644
--- a/algorithms/sac/agent.py
+++ b/algorithms/sac/agent.py
@@ -47,6 +47,7 @@ class SACAgent(Agent):
         hyper_params (dict): hyper-parameters
         total_step (int): total step numbers
         episode_step (int): step number of the current episode
+        update_step (int): step number of updates
         i_episode (int): current episode number
 
     """
@@ -80,6 +81,7 @@ def __init__(
         self.curr_state = np.zeros((1,))
         self.total_step = 0
         self.episode_step = 0
+        self.update_step = 0
         self.i_episode = 0
 
         # automatic entropy tuning
@@ -153,6 +155,8 @@ def _add_transition_to_memory(self, transition: Tuple[np.ndarray, ...]):
 
     def update_model(self) -> Tuple[torch.Tensor, ...]:
         """Train the model after each episode."""
+        self.update_step += 1
+
         experiences = self.memory.sample()
         states, actions, rewards, next_states, dones = experiences
         new_actions, log_prob, pre_tanh_value, mu, std = self.actor(states)
@@ -203,7 +207,7 @@ def update_model(self) -> Tuple[torch.Tensor, ...]:
         vf_loss.backward()
         self.vf_optimizer.step()
 
-        if self.total_step % self.hyper_params["DELAYED_UPDATE"] == 0:
+        if self.update_step % self.hyper_params["POLICY_UPDATE_FREQ"] == 0:
             # actor loss
             advantage = q_pred - v_pred.detach()
             actor_loss = (alpha * log_prob - advantage).mean()
@@ -280,7 +284,7 @@ def save_params(self, n_episode: int):
         Agent.save_params(self, params, n_episode)
 
     def write_log(
-        self, i: int, loss: np.ndarray, score: float = 0.0, delayed_update: int = 1
+        self, i: int, loss: np.ndarray, score: float = 0.0, policy_update_freq: int = 1
     ):
         """Write log about loss and score"""
         total_loss = loss.sum()
@@ -295,7 +299,7 @@ def write_log(
                 self.total_step,
                 score,
                 total_loss,
-                loss[0] * delayed_update,  # actor loss
+                loss[0] * policy_update_freq,  # actor loss
                 loss[1],  # qf_1 loss
                 loss[2],  # qf_2 loss
                 loss[3],  # vf loss
@@ -308,7 +312,7 @@ def write_log(
                 {
                     "score": score,
                     "total loss": total_loss,
-                    "actor loss": loss[0] * delayed_update,
+                    "actor loss": loss[0] * policy_update_freq,
                     "qf_1 loss": loss[1],
                     "qf_2 loss": loss[2],
                     "vf loss": loss[3],
@@ -359,7 +363,10 @@ def train(self):
             if loss_episode:
                 avg_loss = np.vstack(loss_episode).mean(axis=0)
                 self.write_log(
-                    self.i_episode, avg_loss, score, self.hyper_params["DELAYED_UPDATE"]
+                    self.i_episode,
+                    avg_loss,
+                    score,
+                    self.hyper_params["POLICY_UPDATE_FREQ"],
                 )
 
             if self.i_episode % self.args.save_period == 0:
diff --git a/algorithms/td3/agent.py b/algorithms/td3/agent.py
index 63ecefc8..c7e982df 100644
--- a/algorithms/td3/agent.py
+++ b/algorithms/td3/agent.py
@@ -29,20 +29,20 @@ class TD3Agent(Agent):
 
     Attributes:
         memory (ReplayBuffer): replay memory
-        noise (GaussianNoise): random noise for exploration
+        exploration_noise (GaussianNoise): random noise for exploration
+        target_policy_noise (GaussianNoise): random noise for target values
         actor (nn.Module): actor model to select actions
-        critic_1 (nn.Module): critic model to predict state values
-        critic_2 (nn.Module): critic model to predict state values
+        critic1 (nn.Module): critic model to predict state values
+        critic2 (nn.Module): critic model to predict state values
         critic_target1 (nn.Module): target critic model to predict state values
         critic_target2 (nn.Module): target critic model to predict state values
         actor_target (nn.Module): target actor model to select actions
-        critic_optimizer (Optimizer): optimizer for training critic
-        actor_optimizer (Optimizer): optimizer for training actor
+        critic_optim (Optimizer): optimizer for training critic
+        actor_optim (Optimizer): optimizer for training actor
         hyper_params (dict): hyper-parameters
         curr_state (np.ndarray): temporary storage of the current state
-        total_step (int): total step numbers
-        update_step (int): train step numbers
-        episode_step (int): step number of the current episode
+        total_steps (int): total step numbers
+        episode_steps (int): step number of the current episode
 
     """
 
@@ -53,7 +53,8 @@ def __init__(
         hyper_params: dict,
         models: tuple,
         optims: tuple,
-        noise: GaussianNoise,
+        exploration_noise: GaussianNoise,
+        target_policy_noise: GaussianNoise,
     ):
         """Initialization.
 
@@ -63,22 +64,24 @@ def __init__(
             hyper_params (dict): hyper-parameters
             models (tuple): models including actor and critic
             optims (tuple): optimizers for actor and critic
-            noise (GaussianNoise): random noise for exploration
+            exploration_noise (GaussianNoise): random noise for exploration
+            target_policy_noise (GaussianNoise): random noise for target values
 
         """
         Agent.__init__(self, env, args)
 
         self.actor, self.actor_target = models[0:2]
-        self.critic_1, self.critic_2 = models[2:4]
+        self.critic1, self.critic2 = models[2:4]
         self.critic_target1, self.critic_target2 = models[4:6]
-        self.actor_optimizer = optims[0]
-        self.critic_optimizer = optims[1]
+        self.actor_optim = optims[0]
+        self.critic_optim = optims[1]
         self.hyper_params = hyper_params
         self.curr_state = np.zeros((1,))
-        self.noise = noise
-        self.total_step = 0
-        self.update_step = 0
-        self.episode_step = 0
+        self.exploration_noise = exploration_noise
+        self.target_policy_noise = target_policy_noise
+        self.total_steps = 0
+        self.episode_steps = 0
+        self.update_steps = 0
         self.i_episode = 0
 
         # load the optimizer and model parameters
@@ -98,102 +101,93 @@ def select_action(self, state: np.ndarray) -> np.ndarray:
 
         self.curr_state = state
 
-        if self.total_step < random_action_count and not self.args.test:
+        if self.total_steps < random_action_count and not self.args.test:
             return self.env.action_space.sample()
 
         state = torch.FloatTensor(state).to(device)
-        selected_action = self.actor(state)
+        selected_action = self.actor(state).detach().cpu().numpy()
 
         if not self.args.test:
-            action_size = selected_action.size()
-            selected_action += torch.FloatTensor(
-                self.noise.sample(action_size, self.total_step)
-            ).to(device)
-            selected_action = torch.clamp(selected_action, -1.0, 1.0)
+            noise = self.exploration_noise.sample()
+            selected_action = np.clip(selected_action + noise, -1.0, 1.0)
 
-        return selected_action.detach().cpu().numpy()
+        return selected_action
 
     def step(self, action: np.ndarray) -> Tuple[np.ndarray, np.float64, bool]:
         """Take an action and return the response of the env."""
-        self.total_step += 1
-        self.episode_step += 1
+        self.total_steps += 1
+        self.episode_steps += 1
 
         next_state, reward, done, _ = self.env.step(action)
-        # if last state is not terminal state in episode, done is false
-        done_bool = (
-            0.0 if self.episode_step == self.args.max_episode_steps else float(done)
-        )
 
         if not self.args.test:
+            # if last state is not terminal state in episode, done is false
+            done_bool = (
+                False if self.episode_steps == self.args.max_episode_steps else done
+            )
             self.memory.add(self.curr_state, action, reward, next_state, done_bool)
 
         return next_state, reward, done
 
     def update_model(
-        self,
-        experiences: Tuple[
-            torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor
-        ],
+        self, experiences: Tuple[torch.Tensor, ...]
     ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         """Train the model after each episode."""
-        self.update_step += 1
+        self.update_steps += 1
 
         states, actions, rewards, next_states, dones = experiences
         masks = 1 - dones
 
         # get actions with noise
-        noise_std, noise_clip = (
-            self.hyper_params["TARGET_SMOOTHING_NOISE_STD"],
-            self.hyper_params["TARGET_SMOOTHING_NOISE_CLIP"],
+        noise = torch.FloatTensor(self.target_policy_noise.sample()).to(device)
+        clipped_noise = torch.clamp(
+            noise,
+            -self.hyper_params["TARGET_POLICY_NOISE_CLIP"],
+            self.hyper_params["TARGET_POLICY_NOISE_CLIP"],
         )
-        next_actions = self.actor_target(next_states)
-        noise = next_actions.data.normal_(0, noise_std).to(device)
-        noise = noise.clamp(-noise_clip, noise_clip)
-        next_actions += noise
-        next_actions = next_actions.clamp(-1.0, 1.0)
+        next_actions = (self.actor_target(next_states) + clipped_noise).clamp(-1.0, 1.0)
 
         # min (Q_1', Q_2')
-        next_states_actions = torch.cat((next_states, next_actions), dim=-1)
-        next_values1 = self.critic_target1(next_states_actions)
-        next_values2 = self.critic_target2(next_states_actions)
+        next_values1 = self.critic_target1(next_states, next_actions)
+        next_values2 = self.critic_target2(next_states, next_actions)
         next_values = torch.min(next_values1, next_values2)
 
         # G_t   = r + gamma * v(s_{t+1})  if state != Terminal
         #       = r                       otherwise
         curr_returns = rewards + self.hyper_params["GAMMA"] * next_values * masks
-        curr_returns = curr_returns.to(device).detach()
+        curr_returns = curr_returns.detach()
 
         # critic loss
-        states_actions = torch.cat((states, actions), dim=-1)
-        values1 = self.critic_1(states_actions)
-        values2 = self.critic_2(states_actions)
-        critic_loss1 = F.mse_loss(values1, curr_returns)
-        critic_loss2 = F.mse_loss(values2, curr_returns)
-        critic_loss = critic_loss1 + critic_loss2
+        values1 = self.critic1(states, actions)
+        values2 = self.critic2(states, actions)
+        critic1_loss = F.mse_loss(values1, curr_returns)
+        critic2_loss = F.mse_loss(values2, curr_returns)
 
         # train critic
-        self.critic_optimizer.zero_grad()
+        critic_loss = critic1_loss + critic2_loss
+        self.critic_optim.zero_grad()
         critic_loss.backward()
-        self.critic_optimizer.step()
+        self.critic_optim.step()
 
-        if self.update_step % self.hyper_params["DELAYED_UPDATE"] == 0:
-            # train actor
+        if self.update_steps % self.hyper_params["POLICY_UPDATE_FREQ"] == 0:
+            # policy loss
             actions = self.actor(states)
-            states_actions = torch.cat((states, actions), dim=-1)
-            actor_loss = -self.critic_1(states_actions).mean()
-            self.actor_optimizer.zero_grad()
+            actor_loss = -self.critic1(states, actions).mean()
+
+            # train actor
+            self.actor_optim.zero_grad()
             actor_loss.backward()
-            self.actor_optimizer.step()
+            self.actor_optim.step()
 
             # update target networks
             tau = self.hyper_params["TAU"]
-            common_utils.soft_update(self.critic_1, self.critic_target1, tau)
-            common_utils.soft_update(self.critic_2, self.critic_target2, tau)
+            common_utils.soft_update(self.critic1, self.critic_target1, tau)
+            common_utils.soft_update(self.critic2, self.critic_target2, tau)
             common_utils.soft_update(self.actor, self.actor_target, tau)
         else:
             actor_loss = torch.zeros(1)
 
-        return actor_loss.data, critic_loss1.data, critic_loss2.data
+        return actor_loss.data, critic1_loss.data, critic2_loss.data
 
     def load_params(self, path: str):
         """Load model and optimizer parameters."""
@@ -202,14 +196,14 @@ def load_params(self, path: str):
             return
 
         params = torch.load(path)
-        self.critic_1.load_state_dict(params["critic_1"])
-        self.critic_2.load_state_dict(params["critic_2"])
+        self.critic1.load_state_dict(params["critic1"])
+        self.critic2.load_state_dict(params["critic2"])
         self.critic_target1.load_state_dict(params["critic_target1"])
         self.critic_target2.load_state_dict(params["critic_target2"])
-        self.critic_optimizer.load_state_dict(params["critic_optim"])
+        self.critic_optim.load_state_dict(params["critic_optim"])
         self.actor.load_state_dict(params["actor"])
         self.actor_target.load_state_dict(params["actor_target"])
-        self.actor_optimizer.load_state_dict(params["actor_optim"])
+        self.actor_optim.load_state_dict(params["actor_optim"])
         print("[INFO] loaded the model and optimizer from", path)
 
     def save_params(self, n_episode: int):
@@ -217,30 +211,31 @@ def save_params(self, n_episode: int):
         params = {
             "actor": self.actor.state_dict(),
             "actor_target": self.actor_target.state_dict(),
-            "actor_optim": self.actor_optimizer.state_dict(),
-            "critic_1": self.critic_1.state_dict(),
-            "critic_2": self.critic_2.state_dict(),
+            "actor_optim": self.actor_optim.state_dict(),
+            "critic1": self.critic1.state_dict(),
+            "critic2": self.critic2.state_dict(),
             "critic_target1": self.critic_target1.state_dict(),
             "critic_target2": self.critic_target2.state_dict(),
-            "critic_optim": self.critic_optimizer.state_dict(),
+            "critic_optim": self.critic_optim.state_dict(),
         }
 
         Agent.save_params(self, params, n_episode)
 
     def write_log(
-        self, i: int, loss: np.ndarray, score: float = 0.0, delayed_update: int = 1
+        self, i: int, loss: np.ndarray, score: float = 0.0, policy_update_freq: int = 1
     ):
         """Write log about loss and score"""
         total_loss = loss.sum()
         print(
-            "[INFO] episode %d total score: %d, total_step: %d, total loss: %f\n"
-            "actor_loss: %.3f critic_1_loss: %.3f critic_2_loss: %.3f\n"
+            "[INFO] episode %d total score: %d, episode_step: %d, total_step: %d\n"
+            "total loss: %f actor_loss: %.3f critic1_loss: %.3f critic2_loss: %.3f\n"
             % (
                 i,
                 score,
-                self.total_step,
+                self.episode_steps,
+                self.total_steps,
                 total_loss,
-                loss[0] * delayed_update,  # actor loss
+                loss[0] * policy_update_freq,  # actor loss
                 loss[1],  # critic1 loss
                 loss[2],  # critic2 loss
             )
@@ -251,9 +246,9 @@ def write_log(
                 {
                     "score": score,
                     "total loss": total_loss,
-                    "actor loss": loss[0] * delayed_update,
-                    "critic_1 loss": loss[1],
-                    "critic_2 loss": loss[2],
+                    "actor loss": loss[0] * policy_update_freq,
+                    "critic1 loss": loss[1],
+                    "critic2 loss": loss[2],
                 }
             )
 
@@ -263,14 +258,14 @@ def train(self):
         if self.args.log:
             wandb.init()
             wandb.config.update(self.hyper_params)
-            # wandb.watch([self.actor, self.critic_1, self.critic_2], log="parameters")
+            # wandb.watch([self.actor, self.critic1, self.critic2], log="parameters")
 
         for self.i_episode in range(1, self.args.episode_num + 1):
             state = self.env.reset()
             done = False
             score = 0
             loss_episode = list()
-            self.episode_step = 0
+            self.episode_steps = 0
 
             while not done:
                 if self.args.render and self.i_episode >= self.args.render_after:
@@ -282,9 +277,7 @@ def train(self):
                 state = next_state
                 score += reward
 
-            # training
-            if len(self.memory) >= self.hyper_params["BATCH_SIZE"]:
-                for _ in range(self.hyper_params["EPOCH"]):
+                if len(self.memory) >= self.hyper_params["BATCH_SIZE"]:
                     experiences = self.memory.sample()
                     loss = self.update_model(experiences)
                     loss_episode.append(loss)  # for logging
@@ -293,7 +286,10 @@ def train(self):
             if loss_episode:
                 avg_loss = np.vstack(loss_episode).mean(axis=0)
                 self.write_log(
-                    self.i_episode, avg_loss, score, self.hyper_params["DELAYED_UPDATE"]
+                    self.i_episode,
+                    avg_loss,
+                    score,
+                    self.hyper_params["POLICY_UPDATE_FREQ"],
                 )
             if self.i_episode % self.args.save_period == 0:
                 self.save_params(self.i_episode)
diff --git a/examples/lunarlander_continuous_v2/bc-sac.py b/examples/lunarlander_continuous_v2/bc-sac.py
index e3258b58..6f9d0748 100644
--- a/examples/lunarlander_continuous_v2/bc-sac.py
+++ b/examples/lunarlander_continuous_v2/bc-sac.py
@@ -31,7 +31,7 @@
     "LR_QF1": 3e-4,
     "LR_QF2": 3e-4,
     "LR_ENTROPY": 3e-4,
-    "DELAYED_UPDATE": 2,
+    "POLICY_UPDATE_FREQ": 2,
     "BUFFER_SIZE": int(1e6),
     "BATCH_SIZE": 512,
     "DEMO_BATCH_SIZE": 64,
diff --git a/examples/lunarlander_continuous_v2/per-ddpg.py b/examples/lunarlander_continuous_v2/per-ddpg.py
index 33769669..4a7c0564 100644
--- a/examples/lunarlander_continuous_v2/per-ddpg.py
+++ b/examples/lunarlander_continuous_v2/per-ddpg.py
@@ -13,7 +13,7 @@
 
 from algorithms.common.networks.mlp import MLP
 from algorithms.common.noise import OUNoise
-from algorithms.per.ddpg_agent import DDPGPERAgent
+from algorithms.per.ddpg_agent import PERDDPGAgent
 
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
@@ -104,7 +104,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int)
     optims = (actor_optim, critic_optim)
 
     # create an agent
-    agent = DDPGPERAgent(env, args, hyper_params, models, optims, noise)
+    agent = PERDDPGAgent(env, args, hyper_params, models, optims, noise)
 
     # run
     if args.test:
diff --git a/examples/lunarlander_continuous_v2/sac.py b/examples/lunarlander_continuous_v2/sac.py
index 344d6c9a..6654cc10 100644
--- a/examples/lunarlander_continuous_v2/sac.py
+++ b/examples/lunarlander_continuous_v2/sac.py
@@ -22,20 +22,20 @@
     "GAMMA": 0.99,
     "TAU": 5e-3,
     "W_ENTROPY": 1e-3,
-    "W_MEAN_REG": 1e-3,
-    "W_STD_REG": 1e-3,
+    "W_MEAN_REG": 0.0,
+    "W_STD_REG": 0.0,
     "W_PRE_ACTIVATION_REG": 0.0,
     "LR_ACTOR": 3e-4,
     "LR_VF": 3e-4,
     "LR_QF1": 3e-4,
     "LR_QF2": 3e-4,
     "LR_ENTROPY": 3e-4,
-    "DELAYED_UPDATE": 2,
+    "POLICY_UPDATE_FREQ": 2,
     "BUFFER_SIZE": int(1e6),
-    "BATCH_SIZE": 512,
+    "BATCH_SIZE": 128,
     "AUTO_ENTROPY_TUNING": True,
     "WEIGHT_DECAY": 0.0,
-    "INITIAL_RANDOM_ACTION": 5000,
+    "INITIAL_RANDOM_ACTION": int(1e4),
     "MULTIPLE_LEARN": 1,
 }
 
diff --git a/examples/lunarlander_continuous_v2/sacfd.py b/examples/lunarlander_continuous_v2/sacfd.py
index 665c9407..b2209d86 100644
--- a/examples/lunarlander_continuous_v2/sacfd.py
+++ b/examples/lunarlander_continuous_v2/sacfd.py
@@ -34,7 +34,7 @@
     "W_MEAN_REG": 1e-3,
     "W_STD_REG": 1e-3,
     "W_PRE_ACTIVATION_REG": 0.0,
-    "DELAYED_UPDATE": 2,
+    "POLICY_UPDATE_FREQ": 2,
     "PRETRAIN_STEP": 100,
     "MULTIPLE_LEARN": 2,  # multiple learning updates
     "LAMBDA1": 1.0,  # N-step return weight
diff --git a/examples/lunarlander_continuous_v2/td3.py b/examples/lunarlander_continuous_v2/td3.py
index b73a05c5..a0ccf39e 100644
--- a/examples/lunarlander_continuous_v2/td3.py
+++ b/examples/lunarlander_continuous_v2/td3.py
@@ -11,7 +11,7 @@
 import torch
 import torch.optim as optim
 
-from algorithms.common.networks.mlp import MLP
+from algorithms.common.networks.mlp import MLP, FlattenMLP
 from algorithms.common.noise import GaussianNoise
 from algorithms.td3.agent import TD3Agent
 
@@ -21,19 +21,15 @@
 hyper_params = {
     "GAMMA": 0.99,
     "TAU": 5e-3,
-    "TARGET_SMOOTHING_NOISE_STD": 0.2,
-    "TARGET_SMOOTHING_NOISE_CLIP": 0.5,
-    "DELAYED_UPDATE": 2,
-    "BUFFER_SIZE": int(1e5),
-    "BATCH_SIZE": 128,
+    "BUFFER_SIZE": int(1e6),
+    "BATCH_SIZE": 100,
     "LR_ACTOR": 1e-3,
-    "LR_CRITIC_1": 1e-3,
-    "LR_CRITIC_2": 1e-3,
-    "GAUSSIAN_NOISE_MIN_SIGMA": 0.1,
-    "GAUSSIAN_NOISE_MAX_SIGMA": 0.1,
-    "GAUSSIAN_NOISE_DECAY_PERIOD": 1000000,
-    "WEIGHT_DECAY": 1e-6,
-    "EPOCH": 256,
+    "LR_CRITIC": 1e-3,
+    "WEIGHT_DECAY": 0.0,
+    "POLICY_UPDATE_FREQ": 2,
+    "EXPLORATION_NOISE": 0.1,
+    "TARGET_POLICY_NOISE": 0.2,
+    "TARGET_POLICY_NOISE_CLIP": 0.5,
     "INITIAL_RANDOM_ACTION": int(1e4),
 }
 
@@ -58,6 +54,7 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int)
         hidden_sizes=hidden_sizes_actor,
         output_activation=torch.tanh,
     ).to(device)
+
     actor_target = MLP(
         input_size=state_dim,
         output_size=action_dim,
@@ -67,28 +64,35 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int)
     actor_target.load_state_dict(actor.state_dict())
 
     # create critic
-    critic_1 = MLP(
+    critic1 = FlattenMLP(
         input_size=state_dim + action_dim,
         output_size=1,
         hidden_sizes=hidden_sizes_critic,
     ).to(device)
-    critic_2 = MLP(
+
+    critic2 = FlattenMLP(
         input_size=state_dim + action_dim,
         output_size=1,
         hidden_sizes=hidden_sizes_critic,
     ).to(device)
-    critic_target1 = MLP(
+
+    critic_target1 = FlattenMLP(
         input_size=state_dim + action_dim,
         output_size=1,
         hidden_sizes=hidden_sizes_critic,
     ).to(device)
-    critic_target2 = MLP(
+
+    critic_target2 = FlattenMLP(
         input_size=state_dim + action_dim,
         output_size=1,
         hidden_sizes=hidden_sizes_critic,
     ).to(device)
-    critic_target1.load_state_dict(critic_1.state_dict())
-    critic_target2.load_state_dict(critic_2.state_dict())
+
+    critic_target1.load_state_dict(critic1.state_dict())
+    critic_target2.load_state_dict(critic2.state_dict())
+
+    # concat critic parameters to use one optim
+    critic_parameters = list(critic1.parameters()) + list(critic2.parameters())
 
     # create optimizers
     actor_optim = optim.Adam(
@@ -96,26 +100,32 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int)
         lr=hyper_params["LR_ACTOR"],
         weight_decay=hyper_params["WEIGHT_DECAY"],
     )
-    critic_parameter = list(critic_1.parameters()) + list(critic_2.parameters())
+
     critic_optim = optim.Adam(
-        critic_parameter,
-        lr=hyper_params["LR_CRITIC_1"],
+        critic_parameters,
+        lr=hyper_params["LR_CRITIC"],
         weight_decay=hyper_params["WEIGHT_DECAY"],
     )
 
     # noise instance to make randomness of action
-    noise = GaussianNoise(
-        hyper_params["GAUSSIAN_NOISE_MIN_SIGMA"],
-        hyper_params["GAUSSIAN_NOISE_MAX_SIGMA"],
-        hyper_params["GAUSSIAN_NOISE_DECAY_PERIOD"],
+    exploration_noise = GaussianNoise(
+        action_dim, hyper_params["EXPLORATION_NOISE"], hyper_params["EXPLORATION_NOISE"]
+    )
+
+    target_policy_noise = GaussianNoise(
+        action_dim,
+        hyper_params["TARGET_POLICY_NOISE"],
+        hyper_params["TARGET_POLICY_NOISE"],
     )
 
     # make tuples to create an agent
-    models = (actor, actor_target, critic_1, critic_2, critic_target1, critic_target2)
+    models = (actor, actor_target, critic1, critic2, critic_target1, critic_target2)
     optims = (actor_optim, critic_optim)
 
     # create an agent
-    agent = TD3Agent(env, args, hyper_params, models, optims, noise)
+    agent = TD3Agent(
+        env, args, hyper_params, models, optims, exploration_noise, target_policy_noise
+    )
 
     # run
     if args.test:
diff --git a/examples/lunarlander_v2/dqfd.py b/examples/lunarlander_v2/dqfd.py
index 3b373c96..91500585 100644
--- a/examples/lunarlander_v2/dqfd.py
+++ b/examples/lunarlander_v2/dqfd.py
@@ -9,10 +9,14 @@
 
 import gym
 import torch
+import torch.nn as nn
 import torch.optim as optim
 
+from algorithms.common.helper_functions import identity
+from algorithms.common.networks.mlp import init_layer_uniform
+from algorithms.dqn.linear import NoisyLinearConstructor
 from algorithms.dqn.networks import C51DuelingMLP
-from algorithms.fd.dqn_agent import DQNfDAgent
+from algorithms.fd.dqn_agent import DQfDAgent
 
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
@@ -47,6 +51,9 @@
     "V_MIN": -300,
     "V_MAX": 300,
     "ATOMS": 1530,
+    # NoisyNet
+    "USE_NOISY_NET": False,
+    "STD_INIT": 0.5,
 }
 
 
@@ -63,6 +70,17 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int)
     # create model
     def get_fc_model():
         hidden_sizes = [128, 64]
+
+        # use noisy net
+        if hyper_params["USE_NOISY_NET"]:
+            linear_layer = NoisyLinearConstructor(hyper_params["STD_INIT"])
+            init_fn = identity
+            hyper_params["MAX_EPSILON"] = 0.0
+            hyper_params["MIN_EPSILON"] = 0.0
+        else:
+            linear_layer = nn.Linear
+            init_fn = init_layer_uniform
+
         model = C51DuelingMLP(
             input_size=state_dim,
             action_size=action_dim,
@@ -70,7 +88,10 @@ def get_fc_model():
             v_min=hyper_params["V_MIN"],
             v_max=hyper_params["V_MAX"],
             atom_size=hyper_params["ATOMS"],
+            linear_layer=linear_layer,
+            init_fn=init_fn,
         ).to(device)
+
         return model
 
     dqn = get_fc_model()
@@ -89,7 +110,7 @@ def get_fc_model():
     models = (dqn, dqn_target)
 
     # create an agent
-    agent = DQNfDAgent(env, args, hyper_params, models, dqn_optim)
+    agent = DQfDAgent(env, args, hyper_params, models, dqn_optim)
 
     # run
     if args.test:
diff --git a/examples/lunarlander_v2/dqn.py b/examples/lunarlander_v2/dqn.py
index c824dfaf..435dd505 100644
--- a/examples/lunarlander_v2/dqn.py
+++ b/examples/lunarlander_v2/dqn.py
@@ -9,9 +9,13 @@
 
 import gym
 import torch
+import torch.nn as nn
 import torch.optim as optim
 
+from algorithms.common.helper_functions import identity
+from algorithms.common.networks.mlp import init_layer_uniform
 from algorithms.dqn.agent import DQNAgent
+from algorithms.dqn.linear import NoisyLinearConstructor
 from algorithms.dqn.networks import C51DuelingMLP
 
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -43,6 +47,9 @@
     "V_MIN": -300,
     "V_MAX": 300,
     "ATOMS": 1530,
+    # NoisyNet
+    "USE_NOISY_NET": True,
+    "STD_INIT": 0.5,
 }
 
 
@@ -59,6 +66,17 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int)
     # create model
     def get_fc_model():
         hidden_sizes = [128, 64]
+
+        if hyper_params["USE_NOISY_NET"]:
+            # use noisy net
+            linear_layer = NoisyLinearConstructor(hyper_params["STD_INIT"])
+            init_fn = identity
+            hyper_params["MAX_EPSILON"] = 0.0
+            hyper_params["MIN_EPSILON"] = 0.0
+        else:
+            linear_layer = nn.Linear
+            init_fn = init_layer_uniform
+
         model = C51DuelingMLP(
             input_size=state_dim,
             action_size=action_dim,
@@ -66,6 +84,8 @@ def get_fc_model():
             v_min=hyper_params["V_MIN"],
             v_max=hyper_params["V_MAX"],
             atom_size=hyper_params["ATOMS"],
+            linear_layer=linear_layer,
+            init_fn=init_fn,
         ).to(device)
 
         return model
diff --git a/examples/pong_no_frameskip_v4/dqn.py b/examples/pong_no_frameskip_v4/dqn.py
index 074b987e..dfe8bd6a 100644
--- a/examples/pong_no_frameskip_v4/dqn.py
+++ b/examples/pong_no_frameskip_v4/dqn.py
@@ -9,10 +9,14 @@
 
 import gym
 import torch
+import torch.nn as nn
 import torch.optim as optim
 
+from algorithms.common.helper_functions import identity
 from algorithms.common.networks.cnn import CNNLayer
+from algorithms.common.networks.mlp import init_layer_uniform
 from algorithms.dqn.agent import DQNAgent
+from algorithms.dqn.linear import NoisyLinearConstructor
 from algorithms.dqn.networks import IQNCNN, IQNMLP
 
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -46,6 +50,9 @@
     "N_QUANTILE_SAMPLES": 32,
     "QUANTILE_EMBEDDING_DIM": 64,
     "KAPPA": 1.0,
+    # NoisyNet
+    "USE_NOISY_NET": True,
+    "STD_INIT": 0.5,
 }
 
 
@@ -64,12 +71,24 @@ def get_cnn_model():
         hidden_sizes = [512]
         action_dim = env.action_space.n
 
+        # use noisy net
+        if hyper_params["USE_NOISY_NET"]:
+            linear_layer = NoisyLinearConstructor(hyper_params["STD_INIT"])
+            init_fn = identity
+            hyper_params["MAX_EPSILON"] = 0.0
+            hyper_params["MIN_EPSILON"] = 0.0
+        else:
+            linear_layer = nn.Linear
+            init_fn = init_layer_uniform
+
         fc_model = IQNMLP(
             input_size=fc_input_size,
             output_size=action_dim,
             hidden_sizes=hidden_sizes,
             n_quantiles=hyper_params["N_QUANTILE_SAMPLES"],
             quantile_embedding_dim=hyper_params["QUANTILE_EMBEDDING_DIM"],
+            linear_layer=linear_layer,
+            init_fn=init_fn,
         ).to(device)
 
         # create a model
diff --git a/examples/reacher_v2/bc-sac.py b/examples/reacher_v2/bc-sac.py
index 5773eb95..94f9b01e 100644
--- a/examples/reacher_v2/bc-sac.py
+++ b/examples/reacher_v2/bc-sac.py
@@ -31,7 +31,7 @@
     "LR_QF1": 3e-4,
     "LR_QF2": 3e-4,
     "LR_ENTROPY": 3e-4,
-    "DELAYED_UPDATE": 2,
+    "POLICY_UPDATE_FREQ": 2,
     "BUFFER_SIZE": int(1e6),
     "BATCH_SIZE": 512,
     "DEMO_BATCH_SIZE": 64,
diff --git a/examples/reacher_v2/sac.py b/examples/reacher_v2/sac.py
index 3f2607d2..3cab21b1 100644
--- a/examples/reacher_v2/sac.py
+++ b/examples/reacher_v2/sac.py
@@ -30,7 +30,7 @@
     "LR_QF1": 3e-4,
     "LR_QF2": 3e-4,
     "LR_ENTROPY": 3e-4,
-    "DELAYED_UPDATE": 2,
+    "POLICY_UPDATE_FREQ": 2,
     "BUFFER_SIZE": int(1e6),
     "BATCH_SIZE": 512,
     "AUTO_ENTROPY_TUNING": True,
diff --git a/examples/reacher_v2/td3.py b/examples/reacher_v2/td3.py
index c488afad..6854f293 100644
--- a/examples/reacher_v2/td3.py
+++ b/examples/reacher_v2/td3.py
@@ -11,7 +11,7 @@
 import torch
 import torch.optim as optim
 
-from algorithms.common.networks.mlp import MLP
+from algorithms.common.networks.mlp import MLP, FlattenMLP
 from algorithms.common.noise import GaussianNoise
 from algorithms.td3.agent import TD3Agent
 
@@ -19,21 +19,17 @@
 
 # hyper parameters
 hyper_params = {
-    "GAMMA": 0.99,
+    "GAMMA": 0.95,
     "TAU": 5e-3,
-    "TARGET_SMOOTHING_NOISE_STD": 0.2,
-    "TARGET_SMOOTHING_NOISE_CLIP": 0.5,
-    "DELAYED_UPDATE": 2,
     "BUFFER_SIZE": int(1e6),
     "BATCH_SIZE": 100,
     "LR_ACTOR": 1e-3,
-    "LR_CRITIC_1": 1e-3,
-    "LR_CRITIC_2": 1e-3,
-    "GAUSSIAN_NOISE_MIN_SIGMA": 0.1,
-    "GAUSSIAN_NOISE_MAX_SIGMA": 0.1,
-    "GAUSSIAN_NOISE_DECAY_PERIOD": 1000000,
-    "WEIGHT_DECAY": 1e-6,
-    "EPOCH": 50,
+    "LR_CRITIC": 1e-3,
+    "WEIGHT_DECAY": 0.0,
+    "POLICY_UPDATE_FREQ": 2,
+    "EXPLORATION_NOISE": 0.1,
+    "TARGET_POLICY_NOISE": 0.2,
+    "TARGET_POLICY_NOISE_CLIP": 0.5,
     "INITIAL_RANDOM_ACTION": int(1e4),
 }
 
@@ -58,37 +54,46 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int)
         hidden_sizes=hidden_sizes_actor,
         output_activation=torch.tanh,
     ).to(device)
+
     actor_target = MLP(
         input_size=state_dim,
         output_size=action_dim,
         hidden_sizes=hidden_sizes_actor,
         output_activation=torch.tanh,
     ).to(device)
+
     actor_target.load_state_dict(actor.state_dict())
 
     # create critic
-    critic_1 = MLP(
+    critic1 = FlattenMLP(
         input_size=state_dim + action_dim,
         output_size=1,
         hidden_sizes=hidden_sizes_critic,
     ).to(device)
-    critic_2 = MLP(
+
+    critic2 = FlattenMLP(
         input_size=state_dim + action_dim,
         output_size=1,
         hidden_sizes=hidden_sizes_critic,
     ).to(device)
-    critic_target1 = MLP(
+
+    critic_target1 = FlattenMLP(
         input_size=state_dim + action_dim,
         output_size=1,
         hidden_sizes=hidden_sizes_critic,
     ).to(device)
-    critic_target2 = MLP(
+
+    critic_target2 = FlattenMLP(
         input_size=state_dim + action_dim,
         output_size=1,
         hidden_sizes=hidden_sizes_critic,
     ).to(device)
-    critic_target1.load_state_dict(critic_1.state_dict())
-    critic_target2.load_state_dict(critic_2.state_dict())
+
+    critic_target1.load_state_dict(critic1.state_dict())
+    critic_target2.load_state_dict(critic2.state_dict())
+
+    # concat critic parameters to use one optim
+    critic_parameters = list(critic1.parameters()) + list(critic2.parameters())
 
     # create optimizers
     actor_optim = optim.Adam(
@@ -96,26 +101,32 @@ def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int)
         lr=hyper_params["LR_ACTOR"],
         weight_decay=hyper_params["WEIGHT_DECAY"],
     )
-    critic_parameter = list(critic_1.parameters()) + list(critic_2.parameters())
+
     critic_optim = optim.Adam(
-        critic_parameter,
-        lr=hyper_params["LR_CRITIC_1"],
+        critic_parameters,
+        lr=hyper_params["LR_CRITIC"],
         weight_decay=hyper_params["WEIGHT_DECAY"],
     )
 
     # noise instance to make randomness of action
-    noise = GaussianNoise(
-        hyper_params["GAUSSIAN_NOISE_MIN_SIGMA"],
-        hyper_params["GAUSSIAN_NOISE_MAX_SIGMA"],
-        hyper_params["GAUSSIAN_NOISE_DECAY_PERIOD"],
+    exploration_noise = GaussianNoise(
+        action_dim, hyper_params["EXPLORATION_NOISE"], hyper_params["EXPLORATION_NOISE"]
+    )
+
+    target_policy_noise = GaussianNoise(
+        action_dim,
+        hyper_params["TARGET_POLICY_NOISE"],
+        hyper_params["TARGET_POLICY_NOISE"],
     )
 
     # make tuples to create an agent
-    models = (actor, actor_target, critic_1, critic_2, critic_target1, critic_target2)
+    models = (actor, actor_target, critic1, critic2, critic_target1, critic_target2)
     optims = (actor_optim, critic_optim)
 
     # create an agent
-    agent = TD3Agent(env, args, hyper_params, models, optims, noise)
+    agent = TD3Agent(
+        env, args, hyper_params, models, optims, exploration_noise, target_policy_noise
+    )
 
     # run
     if args.test: