Skip to content

Commit

Permalink
style(nyz): polish model template comments (#722)
Browse files Browse the repository at this point in the history
* style(nyz): polish model template comments

* style(nyz): polish model template vac comments

* fix(nyz): fix vac bug

* style(nyz): polish model template qac comments

* fix(nyz): fix qac bug

* fix(nyz): fix discrete sac bug

* refactor(nyz): refactor model wrapper api doc

* fix(nyz): fix unittest bugs
  • Loading branch information
PaParaZz1 authored Sep 13, 2023
1 parent d24b8f9 commit a07cde2
Show file tree
Hide file tree
Showing 30 changed files with 777 additions and 758 deletions.
9 changes: 2 additions & 7 deletions ding/entry/tests/test_serial_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,15 +284,10 @@ def test_sac_log_space():
assert False, "pipeline fail"


auto_alpha = [True, False]
log_space = [True, False]
args = [item for item in product(*[auto_alpha, log_space])]


@pytest.mark.platformtest
@pytest.mark.unittest
@pytest.mark.parametrize('auto_alpha, log_space', args)
def test_discrete_sac(auto_alpha, log_space):
def test_discrete_sac():
auto_alpha, log_space = True, False
config = [deepcopy(cartpole_sac_config), deepcopy(cartpole_sac_create_config)]
config[0].policy.learn.update_per_collect = 1
config[0].policy.learn.auto_alpha = auto_alpha
Expand Down
6 changes: 3 additions & 3 deletions ding/framework/tests/test_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def uncaught_exception_main():
time.sleep(0.2)


@pytest.mark.unittest
@pytest.mark.tmp
def test_uncaught_exception():
# Make one process crash, then the parent process will also crash and output the stack of the wrong process.
with pytest.raises(Exception) as exc_info:
Expand Down Expand Up @@ -70,7 +70,7 @@ def disconnected_main():
assert i == 9


@pytest.mark.unittest
@pytest.mark.tmp
def test_disconnected():
# Make one process exit normally and the rest will still run, even if the network request
# is not received by other processes.
Expand Down Expand Up @@ -141,7 +141,7 @@ def main(cls):
raise Exception("Invalid node id")


@pytest.mark.unittest
@pytest.mark.tmp
def test_auto_recover():
# With max_retries=1
Parallel.runner(
Expand Down
8 changes: 4 additions & 4 deletions ding/framework/tests/test_supervisor.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def sleep1(self):
sleep(1)


@pytest.mark.unittest
@pytest.mark.tmp
@pytest.mark.parametrize("type_", [ChildType.PROCESS, ChildType.THREAD])
def test_supervisor(type_):
sv = Supervisor(type_=type_)
Expand Down Expand Up @@ -74,7 +74,7 @@ def test_supervisor(type_):
sv.shutdown()


@pytest.mark.unittest
@pytest.mark.tmp
def test_supervisor_spawn():
sv = Supervisor(type_=ChildType.PROCESS, mp_ctx=mp.get_context("spawn"))
for _ in range(3):
Expand Down Expand Up @@ -103,7 +103,7 @@ def step(self, _):
return self._counter


# @pytest.mark.unittest
@pytest.mark.tmp
@pytest.mark.parametrize("type_", [ChildType.PROCESS, ChildType.THREAD])
def test_crash_supervisor(type_):
sv = Supervisor(type_=type_)
Expand Down Expand Up @@ -143,7 +143,7 @@ def test_crash_supervisor(type_):
sv.shutdown()


@pytest.mark.unittest
@pytest.mark.tmp
@pytest.mark.parametrize("type_", [ChildType.PROCESS, ChildType.THREAD])
def test_recv_all(type_):
sv = Supervisor(type_=type_)
Expand Down
16 changes: 8 additions & 8 deletions ding/framework/tests/test_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,12 @@ def _counter(ctx):
assert sync_count > 0


@pytest.mark.unittest
@pytest.mark.tmp
def test_parallel_pipeline():
Parallel.runner(n_parallel_workers=2, startup_interval=0.1)(parallel_main)


@pytest.mark.unittest
@pytest.mark.tmp
def test_emit():
with task.start():
greets = []
Expand Down Expand Up @@ -161,12 +161,12 @@ def emit_remote_main():
assert len(greets) == 0


@pytest.mark.unittest
@pytest.mark.tmp
def test_emit_remote():
Parallel.runner(n_parallel_workers=2, startup_interval=0.1)(emit_remote_main)


@pytest.mark.unittest
@pytest.mark.tmp
def test_wait_for():
# Wait for will only work in async or parallel mode
with task.start(async_mode=True, n_async_workers=2):
Expand Down Expand Up @@ -198,7 +198,7 @@ def step1(_):
task.run(max_step=1)


@pytest.mark.unittest
@pytest.mark.tmp
def test_async_exception():
with task.start(async_mode=True, n_async_workers=2):

Expand Down Expand Up @@ -227,12 +227,12 @@ def early_stop_main():
assert task.ctx.total_step < 7


@pytest.mark.unittest
@pytest.mark.tmp
def test_early_stop():
Parallel.runner(n_parallel_workers=2, startup_interval=0.1)(early_stop_main)


@pytest.mark.unittest
@pytest.mark.tmp
def test_parallel_in_sequencial():
result = []

Expand All @@ -250,7 +250,7 @@ def slow(_):
assert result == ["begin", "fast", "slow"]


@pytest.mark.unittest
@pytest.mark.tmp
def test_serial_in_parallel():
result = []

Expand Down
2 changes: 1 addition & 1 deletion ding/model/common/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def prod(iterable):
class ConvEncoder(nn.Module):
"""
Overview:
The ``Convolution Encoder`` used to encode raw 2-dim image observations (e.g. Atari/Procgen).
The ``Convolution Encoder`` used to encode raw 2-dim image observations (e.g. Atari/Procgen).
Interfaces:
``__init__``, ``forward``.
"""
Expand Down
4 changes: 2 additions & 2 deletions ding/model/common/head.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def forward(self, x: torch.Tensor) -> Dict:
class RainbowHead(nn.Module):
"""
Overview:
The ``RainbowHead`` used to output Q-value distribution, which is used in Rainbow DQN.
The ``RainbowHead`` used to output Q-value distribution, which is used in Rainbow DQN.
Interfaces:
``__init__``, ``forward``.
"""
Expand Down Expand Up @@ -394,7 +394,7 @@ def forward(self, x: torch.Tensor) -> Dict:
class QRDQNHead(nn.Module):
"""
Overview:
The ``QRDQNHead`` (Quantile Regression DQN) used to output action quantiles.
The ``QRDQNHead`` (Quantile Regression DQN) used to output action quantiles.
Interfaces:
``__init__``, ``forward``.
"""
Expand Down
1 change: 1 addition & 0 deletions ding/model/common/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import torch
from easydict import EasyDict
from ding.utils import import_module, MODEL_REGISTRY
Expand Down
10 changes: 5 additions & 5 deletions ding/model/template/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# general
from .q_learning import DQN, RainbowDQN, QRDQN, IQN, FQF, DRQN, C51DQN, BDQ
from .qac import QAC, DiscreteQAC
from .q_learning import DQN, RainbowDQN, QRDQN, IQN, FQF, DRQN, C51DQN, BDQ, GTrXLDQN
from .qac import DiscreteQAC, ContinuousQAC
from .pdqn import PDQN
from .vac import VAC, DREAMERVAC
from .bc import DiscreteBC, ContinuousBC
from .pg import PG
from .language_transformer import LanguageTransformer
# algorithm-specific
from .pg import PG
from .ppg import PPG
from .qmix import Mixer, QMix
from .collaq import CollaQ
Expand All @@ -19,10 +19,10 @@
from .mavac import MAVAC
from .ngu import NGU
from .qac_dist import QACDIST
from .maqac import MAQAC, ContinuousMAQAC
from .maqac import DiscreteMAQAC, ContinuousMAQAC
from .madqn import MADQN
from .vae import VanillaVAE
from .dt import DecisionTransformer
from .procedure_cloning import ProcedureCloningMCTS, ProcedureCloningBFS
from .bcq import BCQ
from .edac import QACEnsemble
from .edac import EDAC
8 changes: 4 additions & 4 deletions ding/model/template/bc.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,10 @@ def forward(self, inputs: Union[torch.Tensor, Dict[str, torch.Tensor]]) -> Dict[
"""
Overview:
The unique execution (forward) method of ContinuousBC method.
Arguments:
- inputs (:obj:`torch.Tensor`): Observation data, defaults to tensor.
Returns:
- output (:obj:`Dict`): Output dict data, including differnet key-values among distinct action_space.
Arguments:
- inputs (:obj:`torch.Tensor`): Observation data, defaults to tensor.
Returns:
- output (:obj:`Dict`): Output dict data, including differnet key-values among distinct action_space.
"""
if self.action_space == 'regression':
x = self.actor(inputs)
Expand Down
11 changes: 6 additions & 5 deletions ding/model/template/edac.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@


@MODEL_REGISTRY.register('edac')
class QACEnsemble(nn.Module):
r"""
class EDAC(nn.Module):
"""
Overview:
The QAC network with ensemble, which is used in EDAC.
The Q-value Actor-Critic network with the ensemble mechanism, which is used in EDAC.
Interfaces:
``__init__``, ``forward``, ``compute_actor``, ``compute_critic``
"""
Expand Down Expand Up @@ -51,7 +51,7 @@ def __init__(
- norm_type (:obj:`Optional[str]`): The type of normalization to after network layer (FC, Conv), \
see ``ding.torch_utils.network`` for more details.
"""
super(QACEnsemble, self).__init__()
super(EDAC, self).__init__()
obs_shape: int = squeeze(obs_shape)
action_shape = squeeze(action_shape)
self.action_shape = action_shape
Expand Down Expand Up @@ -94,6 +94,7 @@ def forward(self, inputs: Union[torch.Tensor, Dict[str, torch.Tensor]], mode: st
- inputs (:obj:`Dict`): Input dict data, including obs and action tensor.
Returns:
- output (:obj:`Dict`): Output dict data, including q_value tensor.
.. note::
For specific examples, one can refer to API doc of ``compute_actor`` and ``compute_critic`` respectively.
"""
Expand Down Expand Up @@ -125,7 +126,7 @@ def compute_actor(self, obs: torch.Tensor) -> Dict[str, Union[torch.Tensor, Dict
- action_args (:obj:`torch.Tensor`): :math:`(B, N3)`, B is batch size and N3 corresponds to \
``action_shape.action_args_shape``.
Examples:
>>> model = QACEnsemble(64, 64,)
>>> model = EDAC(64, 64,)
>>> obs = torch.randn(4, 64)
>>> actor_outputs = model(obs,'compute_actor')
>>> assert actor_outputs['logit'][0].shape == torch.Size([4, 64]) # mu
Expand Down
58 changes: 28 additions & 30 deletions ding/model/template/maqac.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
FCEncoder, ConvEncoder


@MODEL_REGISTRY.register('maqac')
class MAQAC(nn.Module):
r"""
@MODEL_REGISTRY.register('discrete_maqac')
class DiscreteMAQAC(nn.Module):
"""
Overview:
The MAQAC model.
The discrete action Multi-Agent Q-value Actor-CritiC (MAQAC) model.
Interfaces:
``__init__``, ``forward``, ``compute_actor``, ``compute_critic``
"""
Expand All @@ -32,28 +32,27 @@ def __init__(
activation: Optional[nn.Module] = nn.ReLU(),
norm_type: Optional[str] = None,
) -> None:
r"""
"""
Overview:
Init the MAQAC Model according to arguments.
Init the DiscreteMAQAC Model according to arguments.
Arguments:
- agent_obs_shape (:obj:`Union[int, SequenceType]`): Agent's observation's space.
- global_obs_shape (:obj:`Union[int, SequenceType]`): Global observation's space.
- obs_shape (:obj:`Union[int, SequenceType]`): Observation's space.
- action_shape (:obj:`Union[int, SequenceType]`): Action's space.
- twin_critic (:obj:`bool`): Whether include twin critic.
- actor_head_hidden_size (:obj:`Optional[int]`): The ``hidden_size`` to pass to actor-nn's ``Head``.
- actor_head_layer_num (:obj:`int`):
The num of layers used in the network to compute Q value output for actor's nn.
- actor_head_layer_num (:obj:`int`): The num of layers used in the network to compute Q value output \
for actor's nn.
- critic_head_hidden_size (:obj:`Optional[int]`): The ``hidden_size`` to pass to critic-nn's ``Head``.
- critic_head_layer_num (:obj:`int`):
The num of layers used in the network to compute Q value output for critic's nn.
- activation (:obj:`Optional[nn.Module]`):
The type of activation function to use in ``MLP`` the after ``layer_fn``,
if ``None`` then default set to ``nn.ReLU()``
- norm_type (:obj:`Optional[str]`):
The type of normalization to use, see ``ding.torch_utils.fc_block`` for more details.
- critic_head_layer_num (:obj:`int`): The num of layers used in the network to compute Q value output \
for critic's nn.
- activation (:obj:`Optional[nn.Module]`): The type of activation function to use in ``MLP`` the after \
``layer_fn``, if ``None`` then default set to ``nn.ReLU()``
- norm_type (:obj:`Optional[str]`): The type of normalization to use, see ``ding.torch_utils.fc_block`` \
for more details.
"""
super(MAQAC, self).__init__()
super(DiscreteMAQAC, self).__init__()
agent_obs_shape: int = squeeze(agent_obs_shape)
action_shape: int = squeeze(action_shape)
self.actor = nn.Sequential(
Expand Down Expand Up @@ -188,11 +187,11 @@ def compute_critic(self, inputs: Dict) -> Dict:
return {'q_value': x}


@MODEL_REGISTRY.register('maqac_continuous')
@MODEL_REGISTRY.register('continuous_maqac')
class ContinuousMAQAC(nn.Module):
r"""
"""
Overview:
The Continuous MAQAC model.
The continuous action Multi-Agent Q-value Actor-CritiC (MAQAC) model.
Interfaces:
``__init__``, ``forward``, ``compute_actor``, ``compute_critic``
"""
Expand All @@ -212,7 +211,7 @@ def __init__(
activation: Optional[nn.Module] = nn.ReLU(),
norm_type: Optional[str] = None,
) -> None:
r"""
"""
Overview:
Init the QAC Model according to arguments.
Arguments:
Expand All @@ -221,24 +220,23 @@ def __init__(
- action_space (:obj:`str`): Whether choose ``regression`` or ``reparameterization``.
- twin_critic (:obj:`bool`): Whether include twin critic.
- actor_head_hidden_size (:obj:`Optional[int]`): The ``hidden_size`` to pass to actor-nn's ``Head``.
- actor_head_layer_num (:obj:`int`):
The num of layers used in the network to compute Q value output for actor's nn.
- actor_head_layer_num (:obj:`int`): The num of layers used in the network to compute Q value output \
for actor's nn.
- critic_head_hidden_size (:obj:`Optional[int]`): The ``hidden_size`` to pass to critic-nn's ``Head``.
- critic_head_layer_num (:obj:`int`):
The num of layers used in the network to compute Q value output for critic's nn.
- activation (:obj:`Optional[nn.Module]`):
The type of activation function to use in ``MLP`` the after ``layer_fn``,
if ``None`` then default set to ``nn.ReLU()``
- norm_type (:obj:`Optional[str]`):
The type of normalization to use, see ``ding.torch_utils.fc_block`` for more details.
- critic_head_layer_num (:obj:`int`): The num of layers used in the network to compute Q value output \
for critic's nn.
- activation (:obj:`Optional[nn.Module]`): The type of activation function to use in ``MLP`` the after \
``layer_fn``, if ``None`` then default set to ``nn.ReLU()``
- norm_type (:obj:`Optional[str]`): The type of normalization to use, see ``ding.torch_utils.fc_block`` \
for more details.
"""
super(ContinuousMAQAC, self).__init__()
obs_shape: int = squeeze(agent_obs_shape)
global_obs_shape: int = squeeze(global_obs_shape)
action_shape = squeeze(action_shape)
self.action_shape = action_shape
self.action_space = action_space
assert self.action_space in ['regression', 'reparameterization']
assert self.action_space in ['regression', 'reparameterization'], self.action_space
if self.action_space == 'regression': # DDPG, TD3
self.actor = nn.Sequential(
nn.Linear(obs_shape, actor_head_hidden_size), activation,
Expand Down
Loading

0 comments on commit a07cde2

Please sign in to comment.