diff --git a/ding/model/template/bc.py b/ding/model/template/bc.py index 4568e3ce1c..ce58ca8c5f 100644 --- a/ding/model/template/bc.py +++ b/ding/model/template/bc.py @@ -1,9 +1,8 @@ -from typing import Union, Optional, Dict, Callable, List +from typing import Union, Optional, Dict import torch import torch.nn as nn from easydict import EasyDict -from ding.torch_utils import get_lstm from ding.utils import MODEL_REGISTRY, SequenceType, squeeze from ..common import FCEncoder, ConvEncoder, DiscreteHead, DuelingHead, \ MultiHead, RegressionHead, ReparameterizationHead @@ -11,6 +10,12 @@ @MODEL_REGISTRY.register('discrete_bc') class DiscreteBC(nn.Module): + """ + Overview: + The DiscreteBC network. + Interfaces: + ``__init__``, ``forward`` + """ def __init__( self, @@ -36,9 +41,11 @@ def __init__( - head_hidden_size (:obj:`Optional[int]`): The ``hidden_size`` of head network. - head_layer_num (:obj:`int`): The number of layers used in the head network to compute Q value output - activation (:obj:`Optional[nn.Module]`): The type of activation function in networks \ - if ``None`` then default set it to ``nn.ReLU()`` + if ``None`` then default set it to ``nn.ReLU()``. - norm_type (:obj:`Optional[str]`): The type of normalization in networks, see \ ``ding.torch_utils.fc_block`` for more details. + - strides (:obj:`Optional[list]`): The strides for each convolution layers, such as [2, 2, 2]. The length \ + of this argument should be the same as ``encoder_hidden_size_list``. """ super(DiscreteBC, self).__init__() # For compatibility: 1, (1, ), [4, 32, 32] @@ -83,7 +90,7 @@ def __init__( ) def forward(self, x: torch.Tensor) -> Dict: - r""" + """ Overview: DiscreteBC forward computation graph, input observation tensor to predict q_value. Arguments: @@ -108,7 +115,7 @@ def forward(self, x: torch.Tensor) -> Dict: @MODEL_REGISTRY.register('continuous_bc') class ContinuousBC(nn.Module): - r""" + """ Overview: The ContinuousBC network. Interfaces: @@ -127,7 +134,7 @@ def __init__( ) -> None: """ Overview: - Initailize the ContinuousBC Model according to input arguments. + Initialize the ContinuousBC Model according to input arguments. Arguments: - obs_shape (:obj:`Union[int, SequenceType]`): Observation's shape, such as 128, (156, ). - action_shape (:obj:`Union[int, SequenceType, EasyDict]`): Action's shape, such as 4, (3, ), \ @@ -173,14 +180,27 @@ def __init__( ) ) - def forward(self, inputs: Union[torch.Tensor, Dict[str, torch.Tensor]]) -> Dict[str, torch.Tensor]: + def forward(self, inputs: Union[torch.Tensor, Dict[str, torch.Tensor]]) -> Dict: """ Overview: - The unique execution (forward) method of ContinuousBC method. + The unique execution (forward) method of ContinuousBC. Arguments: - inputs (:obj:`torch.Tensor`): Observation data, defaults to tensor. Returns: - - output (:obj:`Dict`): Output dict data, including differnet key-values among distinct action_space. + - output (:obj:`Dict`): Output dict data, including different key-values among distinct action_space. + + Examples (Regression): + >>> model = ContinuousBC(32, 6, action_space='regression') + >>> inputs = torch.randn(4, 32) + >>> outputs = model(inputs) + >>> assert isinstance(outputs, dict) and outputs['action'].shape == torch.Size([4, 6]) + + Examples (Reparameterization): + >>> model = ContinuousBC(32, 6, action_space='reparameterization') + >>> inputs = torch.randn(4, 32) + >>> outputs = model(inputs) + >>> assert isinstance(outputs, dict) and outputs['logit'][0].shape == torch.Size([4, 6]) + >>> assert outputs['logit'][1].shape == torch.Size([4, 6]) """ if self.action_space == 'regression': x = self.actor(inputs) diff --git a/ding/model/template/decision_transformer.py b/ding/model/template/decision_transformer.py index 73330da25f..0b8f47a71d 100644 --- a/ding/model/template/decision_transformer.py +++ b/ding/model/template/decision_transformer.py @@ -13,14 +13,34 @@ """ import math +from typing import Union, Optional, Tuple + import torch import torch.nn as nn import torch.nn.functional as F +from ding.utils import SequenceType class MaskedCausalAttention(nn.Module): - - def __init__(self, h_dim, max_T, n_heads, drop_p): + """ + Overview: + The implementation of masked causal attention in decision transformer. The input of this module is a sequence \ + of several tokens. For the calculated hidden embedding for the i-th token, it is only related the 0 to i-1 \ + input tokens by applying a mask to the attention map. Thus, this module is called masked-causal attention. + Interfaces: + ``__init__``, ``forward`` + """ + + def __init__(self, h_dim: int, max_T: int, n_heads: int, drop_p: float) -> None: + """ + Overview: + Initialize the MaskedCausalAttention Model according to input arguments. + Arguments: + - h_dim (:obj:`int`): The dimension of the hidden layers, such as 128. + - max_T (:obj:`int`): The max context length of the attention, such as 6. + - n_heads (:obj:`int`): The number of heads in calculating attention, such as 8. + - drop_p (:obj:`float`): The drop rate of the drop-out layer, such as 0.1. + """ super().__init__() self.n_heads = n_heads @@ -42,7 +62,22 @@ def __init__(self, h_dim, max_T, n_heads, drop_p): # during backpropagation self.register_buffer('mask', mask) - def forward(self, x): + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Overview: + MaskedCausalAttention forward computation graph, input a sequence tensor \ + and return a tensor with the same shape. + Arguments: + - x (:obj:`torch.Tensor`): The input tensor. + Returns: + - out (:obj:`torch.Tensor`): Output tensor, the shape is the same as the input. + + Examples: + >>> inputs = torch.randn(2, 4, 64) + >>> model = MaskedCausalAttention(64, 5, 4, 0.1) + >>> outputs = model(inputs) + >>> assert outputs.shape == torch.Size([2, 4, 64]) + """ B, T, C = x.shape # batch size, seq length, h_dim * n_heads N, D = self.n_heads, C // self.n_heads # N = num heads, D = attention dim @@ -70,8 +105,23 @@ def forward(self, x): class Block(nn.Module): - - def __init__(self, h_dim, max_T, n_heads, drop_p): + """ + Overview: + The implementation of a transformer block in decision transformer. + Interfaces: + ``__init__``, ``forward`` + """ + + def __init__(self, h_dim: int, max_T: int, n_heads: int, drop_p: float) -> None: + """ + Overview: + Initialize the Block Model according to input arguments. + Arguments: + - h_dim (:obj:`int`): The dimension of the hidden layers, such as 128. + - max_T (:obj:`int`): The max context length of the attention, such as 6. + - n_heads (:obj:`int`): The number of heads in calculating attention, such as 8. + - drop_p (:obj:`float`): The drop rate of the drop-out layer, such as 0.1. + """ super().__init__() self.attention = MaskedCausalAttention(h_dim, max_T, n_heads, drop_p) self.mlp = nn.Sequential( @@ -83,7 +133,22 @@ def __init__(self, h_dim, max_T, n_heads, drop_p): self.ln1 = nn.LayerNorm(h_dim) self.ln2 = nn.LayerNorm(h_dim) - def forward(self, x): + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Overview: + Forward computation graph of the decision transformer block, input a sequence tensor \ + and return a tensor with the same shape. + Arguments: + - x (:obj:`torch.Tensor`): The input tensor. + Returns: + - output (:obj:`torch.Tensor`): Output tensor, the shape is the same as the input. + + Examples: + >>> inputs = torch.randn(2, 4, 64) + >>> model = Block(64, 5, 4, 0.1) + >>> outputs = model(inputs) + >>> outputs.shape == torch.Size([2, 4, 64]) + """ # Attention -> LayerNorm -> MLP -> LayerNorm x = x + self.attention(x) # residual x = self.ln1(x) @@ -95,20 +160,42 @@ def forward(self, x): class DecisionTransformer(nn.Module): + """ + Overview: + The implementation of decision transformer. + Interfaces: + ``__init__``, ``forward``, ``configure_optimizers`` + """ def __init__( self, - state_dim, - act_dim, - n_blocks, - h_dim, - context_len, - n_heads, - drop_p, - max_timestep=4096, - state_encoder=None, - continuous=False + state_dim: Union[int, SequenceType], + act_dim: int, + n_blocks: int, + h_dim: int, + context_len: int, + n_heads: int, + drop_p: float, + max_timestep: int = 4096, + state_encoder: Optional[nn.Module] = None, + continuous: bool = False ): + """ + Overview: + Initialize the DecisionTransformer Model according to input arguments. + Arguments: + - obs_shape (:obj:`Union[int, SequenceType]`): Dimension of state, such as 128 or (4, 84, 84). + - act_dim (:obj:`int`): The dimension of actions, such as 6. + - n_blocks (:obj:`int`): The number of transformer blocks in the decision transformer, such as 3. + - h_dim (:obj:`int`): The dimension of the hidden layers, such as 128. + - context_len (:obj:`int`): The max context length of the attention, such as 6. + - n_heads (:obj:`int`): The number of heads in calculating attention, such as 8. + - drop_p (:obj:`float`): The drop rate of the drop-out layer, such as 0.1. + - max_timestep (:obj:`int`): The max length of the total sequence, defaults to be 4096. + - state_encoder (:obj:`Optional[nn.Module]`): The encoder to pre-process the given input. If it is set to \ + None, the raw state will be pushed into the transformer. + - continuous (:obj:`bool`): Whether the action space is continuous, defaults to be ``False``. + """ super().__init__() self.state_dim = state_dim @@ -152,7 +239,60 @@ def __init__( self.embed_action = nn.Sequential(nn.Embedding(act_dim, h_dim), nn.Tanh()) self.transformer = nn.Sequential(*blocks) - def forward(self, timesteps, states, actions, returns_to_go, tar=None): + def forward( + self, + timesteps: torch.Tensor, + states: torch.Tensor, + actions: torch.Tensor, + returns_to_go: torch.Tensor, + tar: Optional[int] = None + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Overview: + Forward computation graph of the decision transformer, input a sequence tensor \ + and return a tensor with the same shape. + Arguments: + - timesteps (:obj:`torch.Tensor`): The timestep for input sequence. + - states (:obj:`torch.Tensor`): The sequence of states. + - actions (:obj:`torch.Tensor`): The sequence of actions. + - returns_to_go (:obj:`torch.Tensor`): The sequence of return-to-go. + - tar (:obj:`Optional[int]`): Whether to predict action, regardless of index. + Returns: + - output (:obj:`Tuple[torch.Tensor, torch.Tensor, torch.Tensor]`): Output contains three tensors, \ + they are correspondingly the predicted states, predicted actions and predicted return-to-go. + + Examples: + >>> B, T = 4, 6 + >>> state_dim = 3 + >>> act_dim = 2 + >>> DT_model = DecisionTransformer(\ + state_dim=state_dim,\ + act_dim=act_dim,\ + n_blocks=3,\ + h_dim=8,\ + context_len=T,\ + n_heads=2,\ + drop_p=0.1,\ + ) + + >>> timesteps = torch.randint(0, 100, [B, 3 * T - 1, 1], dtype=torch.long) # B x T + >>> states = torch.randn([B, T, state_dim]) # B x T x state_dim + + >>> actions = torch.randint(0, act_dim, [B, T, 1]) + >>> action_target = torch.randint(0, act_dim, [B, T, 1]) + >>> returns_to_go_sample = torch.tensor([1, 0.8, 0.6, 0.4, 0.2, 0.]).repeat([B, 1]).unsqueeze(-1).float() + + >>> traj_mask = torch.ones([B, T], dtype=torch.long) # B x T + >>> actions = actions.squeeze(-1) + + >>> state_preds, action_preds, return_preds = DT_model.forward(\ + timesteps=timesteps, states=states, actions=actions, returns_to_go=returns_to_go\ + ) + + >>> assert state_preds.shape == torch.Size([B, T, state_dim]) + >>> assert return_preds.shape == torch.Size([B, T, 1]) + >>> assert action_preds.shape == torch.Size([B, T, act_dim]) + """ B, T = states.shape[0], states.shape[1] if self.state_encoder is None: time_embeddings = self.embed_timestep(timesteps) @@ -217,12 +357,20 @@ def forward(self, timesteps, states, actions, returns_to_go, tar=None): return state_preds, action_preds, return_preds - def configure_optimizers(self, weight_decay, learning_rate, betas=(0.9, 0.95)): + def configure_optimizers( + self, weight_decay: float, learning_rate: float, betas: Tuple[float, float] = (0.9, 0.95) + ) -> torch.optim.optimizer.Optimizer: """ - This long function is unfortunately doing something very simple and is being very defensive: - We are separating out all parameters of the model into two buckets: those that will experience - weight decay for regularization and those that won't (biases, and layernorm/embedding weights). - We are then returning the PyTorch optimizer object. + Overview: + This function returns an optimizer given the input arguments. \ + We are separating out all parameters of the model into two buckets: those that will experience \ + weight decay for regularization and those that won't (biases, and layernorm/embedding weights). + Arguments: + - weight_decay (:obj:`float`): The weigh decay of the optimizer. + - learning_rate (:obj:`float`): The learning rate of the optimizer. + - betas (:obj:`Tuple[float, float]`): The betas for Adam optimizer. + Outputs: + - optimizer (:obj:`torch.optim.optimizer.Optimizer`): The desired optimizer. """ # separate out all parameters to those that will and won't experience regularizing weight decay diff --git a/ding/model/template/language_transformer.py b/ding/model/template/language_transformer.py index 521d365376..cac2d69adf 100644 --- a/ding/model/template/language_transformer.py +++ b/ding/model/template/language_transformer.py @@ -1,16 +1,23 @@ +from typing import List, Dict import torch - -from ding.utils import MODEL_REGISTRY from torch import nn + try: from transformers import AutoTokenizer, AutoModelForTokenClassification except ImportError: from ditk import logging logging.warning("not found transformer, please install it using: pip install transformers") +from ding.utils import MODEL_REGISTRY @MODEL_REGISTRY.register('language_transformer') class LanguageTransformer(nn.Module): + """ + Overview: + The LanguageTransformer network. Download a pre-trained language model and add head on it. + Interfaces: + ``__init__``, ``forward`` + """ def __init__( self, @@ -19,6 +26,17 @@ def __init__( embedding_size: int = 128, freeze_encoder: bool = True ) -> None: + """ + Overview: + Init the LanguageTransformer Model according to input arguments. + Arguments: + - model_name (:obj:`str`): The base language model name in huggingface, such as "bert-base-uncased". + - add_linear (:obj:`bool`): Whether to add a linear layer on the top of language model, defaults to be \ + ``False``. + - embedding_size (:obj:`int`): The embedding size of the added linear layer, such as 128. + - freeze_encoder (:obj:`bool`): Whether to freeze the encoder language model while training, \ + defaults to be ``True``. + """ super().__init__() self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.model = AutoModelForTokenClassification.from_pretrained(model_name) @@ -29,7 +47,7 @@ def __init__( param.requires_grad = False if add_linear: - # Add an additional small, adjustable linear layer on top of BERT tuned through RL + # Add a small, adjustable linear layer on top of language model tuned through RL self.embedding_size = embedding_size self.linear = nn.Linear( self.model.config.hidden_size, embedding_size @@ -54,7 +72,30 @@ def _calc_embedding(self, x: list) -> torch.Tensor: return sentence_embedding - def forward(self, train_samples: list, candidate_samples: list) -> dict: + def forward(self, train_samples: List[str], candidate_samples: List[str]) -> Dict: + """ + Overview: + LanguageTransformer forward computation graph, input two lists of strings and predict their matching scores. + Arguments: + - train_samples (:obj:`List[str]`): One list of strings. + - candidate_samples (:obj:`List[str]`): The other list of strings to calculate the matching scores. + Returns: + - output (:obj:`Dict`): Output dict data, including the logit of matching scores and the \ + corresponding ``torch.distributions.Categorical`` object. + + Examples: + >>> test_pids = [1] + >>> cand_pids = [0, 2, 4] + >>> problems = [ \ + "This is problem 0", "This is the first question", "Second problem is here", "Another problem", \ + "This is the last problem" \ + ] + >>> ctxt_list = [problems[pid] for pid in test_pids] + >>> cands_list = [problems[pid] for pid in cand_pids] + >>> model = LanguageTransformer(model_name="bert-base-uncased", add_linear=True, embedding_size=256) + >>> scores = model(ctxt_list, cands_list) + >>> assert scores.shape == (1, 3) + """ prompt_embedding = self._calc_embedding(train_samples) cands_embedding = self._calc_embedding(candidate_samples) scores = torch.mm(prompt_embedding, cands_embedding.t()) diff --git a/ding/model/template/procedure_cloning.py b/ding/model/template/procedure_cloning.py index d0d6ffcbd0..4f03c8a4bf 100644 --- a/ding/model/template/procedure_cloning.py +++ b/ding/model/template/procedure_cloning.py @@ -9,12 +9,31 @@ from ..common import FCEncoder, ConvEncoder -class Block(nn.Module): +class PCTransformer(nn.Module): + """ + Overview: + The transformer block for neural network of algorithms related to Procedure cloning (PC). + Interfaces: + ``__init__``, ``forward``. + """ def __init__( self, cnn_hidden: int, att_hidden: int, att_heads: int, drop_p: float, max_T: int, n_att: int, feedforward_hidden: int, n_feedforward: int ) -> None: + """ + Overview: + Initialize the procedure cloning transformer model according to corresponding input arguments. + Arguments: + - cnn_hidden (:obj:`int`): The last channel dimension of CNN encoder, such as 32. + - att_hidden (:obj:`int`): The dimension of attention blocks, such as 32. + - att_heads (:obj:`int`): The number of heads in attention blocks, such as 4. + - drop_p (:obj:`float`): The drop out rate of attention, such as 0.5. + - max_T (:obj:`int`): The sequence length of procedure cloning, such as 4. + - n_attn (:obj:`int`): The number of attention layers, such as 4. + - feedforward_hidden (:obj:`int`):The dimension of feedforward layers, such as 32. + - n_feedforward (:obj:`int`): The number of feedforward layers, such as 4. + """ super().__init__() self.n_att = n_att self.n_feedforward = n_feedforward @@ -34,7 +53,20 @@ def __init__( self.norm_layer.extend([nn.LayerNorm(feedforward_hidden)] * n_feedforward) self.mask = torch.tril(torch.ones((max_T, max_T), dtype=torch.bool)).view(1, 1, max_T, max_T) - def forward(self, x: torch.Tensor): + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Overview: + The unique execution (forward) method of PCTransformer. + Arguments: + - x (:obj:`torch.Tensor`): Sequential data of several hidden states. + Returns: + - output (:obj:`torch.Tensor`): A tensor with the same shape as the input. + Examples: + >>> model = PCTransformer(128, 128, 8, 0, 16, 2, 128, 2) + >>> h = torch.randn((2, 16, 128)) + >>> h = model(h) + >>> assert h.shape == torch.Size([2, 16, 128]) + """ for i in range(self.n_att): x = self.att_drop(self.attention_layer[i](x, self.mask)) x = self.norm_layer[i](x) @@ -46,37 +78,63 @@ def forward(self, x: torch.Tensor): @MODEL_REGISTRY.register('pc_mcts') class ProcedureCloningMCTS(nn.Module): + """ + Overview: + The neural network of algorithms related to Procedure cloning (PC). + Interfaces: + ``__init__``, ``forward``. + """ def __init__( self, obs_shape: SequenceType, action_dim: int, cnn_hidden_list: SequenceType = [128, 128, 256, 256, 256], - cnn_activation: Optional[nn.Module] = nn.ReLU(), + cnn_activation: nn.Module = nn.ReLU(), cnn_kernel_size: SequenceType = [3, 3, 3, 3, 3], cnn_stride: SequenceType = [1, 1, 1, 1, 1], - cnn_padding: Optional[SequenceType] = [1, 1, 1, 1, 1], + cnn_padding: SequenceType = [1, 1, 1, 1, 1], mlp_hidden_list: SequenceType = [256, 256], - mlp_activation: Optional[nn.Module] = nn.ReLU(), + mlp_activation: nn.Module = nn.ReLU(), att_heads: int = 8, att_hidden: int = 128, n_att: int = 4, n_feedforward: int = 2, feedforward_hidden: int = 256, drop_p: float = 0.5, - augment: bool = True, max_T: int = 17 ) -> None: + """ + Overview: + Initialize the MCTS procedure cloning model according to corresponding input arguments. + Arguments: + - obs_shape (:obj:`SequenceType`): Observation space shape, such as [4, 84, 84]. + - action_dim (:obj:`int`): Action space shape, such as 6. + - cnn_hidden_list (:obj:`SequenceType`): The cnn channel dims for each block, such as\ + [128, 128, 256, 256, 256]. + - cnn_activation (:obj:`nn.Module`): The activation function for cnn blocks, such as ``nn.ReLU()``. + - cnn_kernel_size (:obj:`SequenceType`): The kernel size for each cnn block, such as [3, 3, 3, 3, 3]. + - cnn_stride (:obj:`SequenceType`): The stride for each cnn block, such as [1, 1, 1, 1, 1]. + - cnn_padding (:obj:`SequenceType`): The padding for each cnn block, such as [1, 1, 1, 1, 1]. + - mlp_hidden_list (:obj:`SequenceType`): The last dim for this must match the last dim of \ + ``cnn_hidden_list``, such as [256, 256]. + - mlp_activation (:obj:`nn.Module`): The activation function for mlp layers, such as ``nn.ReLU()``. + - att_heads (:obj:`int`): The number of attention heads in transformer, such as 8. + - att_hidden (:obj:`int`): The number of attention dimension in transformer, such as 128. + - n_att (:obj:`int`): The number of attention blocks in transformer, such as 4. + - n_feedforward (:obj:`int`): The number of feedforward layers in transformer, such as 2. + - drop_p (:obj:`float`): The drop out rate of attention, such as 0.5. + - max_T (:obj:`int`): The sequence length of procedure cloning, such as 17. + """ super().__init__() - #Conv Encoder + # Conv Encoder self.embed_state = ConvEncoder( obs_shape, cnn_hidden_list, cnn_activation, cnn_kernel_size, cnn_stride, cnn_padding ) self.embed_action = FCEncoder(action_dim, mlp_hidden_list, activation=mlp_activation) self.cnn_hidden_list = cnn_hidden_list - self.augment = augment assert cnn_hidden_list[-1] == mlp_hidden_list[-1] layers = [] @@ -93,7 +151,7 @@ def __init__( layers.append(fc_block(feedforward_hidden, feedforward_hidden, activation=nn.ReLU())) self.layernorm2 = build_normalization('LN')(feedforward_hidden) - self.transformer = Block( + self.transformer = PCTransformer( cnn_hidden_list[-1], att_hidden, att_heads, drop_p, max_T, n_att, feedforward_hidden, n_feedforward ) @@ -101,8 +159,28 @@ def __init__( self.predict_action = torch.nn.Linear(cnn_hidden_list[-1], action_dim) def forward(self, states: torch.Tensor, goals: torch.Tensor, - actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - + actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Overview: + ProcedureCloningMCTS forward computation graph, input states tensor and goals tensor, \ + calculate the predicted states and actions. + Arguments: + - states (:obj:`torch.Tensor`): The observation of current time. + - goals (:obj:`torch.Tensor`): The target observation after a period. + - actions (:obj:`torch.Tensor`): The actions executed during the period. + Returns: + - outputs (:obj:`Tuple[torch.Tensor, torch.Tensor]`): Predicted states and actions. + Examples: + >>> inputs = { \ + 'states': torch.randn(2, 3, 64, 64), \ + 'goals': torch.randn(2, 3, 64, 64), \ + 'actions': torch.randn(2, 15, 9) \ + } + >>> model = ProcedureCloningMCTS(obs_shape=(3, 64, 64), action_dim=9) + >>> goal_preds, action_preds = model(inputs['states'], inputs['goals'], inputs['actions']) + >>> assert goal_preds.shape == (2, 256) + >>> assert action_preds.shape == (2, 16, 9) + """ B, T, _ = actions.shape # shape: (B, h_dim) @@ -123,7 +201,8 @@ def forward(self, states: torch.Tensor, goals: torch.Tensor, class BFSConvEncoder(nn.Module): """ - Overview: The ``BFSConvolution Encoder`` used to encode raw 2-dim observations. And output a feature map with the + Overview: + The ``BFSConvolution Encoder`` used to encode raw 3-dim observations. And output a feature map with the same height and width as input. Interfaces: ``__init__``, ``forward``. """ @@ -174,21 +253,42 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: - x (:obj:`torch.Tensor`): Env raw observation. Returns: - outputs (:obj:`torch.Tensor`): Output embedding tensor. - Shapes: - - outputs: :math:`(B, N, H, W)`, where ``N = hidden_size_list[-1]``. + Examples: + >>> model = BFSConvEncoder([3, 16, 16], [32, 32, 4], kernel_size=[3, 3, 3], stride=[1, 1, 1]\ + , padding=[1, 1, 1]) + >>> inputs = torch.randn(3, 16, 16).unsqueeze(0) + >>> outputs = model(inputs) + >>> assert outputs['logit'].shape == torch.Size([4, 16, 16]) """ return self.main(x) @MODEL_REGISTRY.register('pc_bfs') class ProcedureCloningBFS(nn.Module): + """ + Overview: + The neural network introduced in procedure cloning (PC) to process 3-dim observations.\ + Given an input, this model will perform several 3x3 convolutions and output a feature map with \ + the same height and width of input. The channel number of output will be the ``action_shape``. + Interfaces: + ``__init__``, ``forward``. + """ def __init__( self, - obs_shape: Union[int, SequenceType], - action_shape: Union[int, SequenceType], + obs_shape: SequenceType, + action_shape: int, encoder_hidden_size_list: SequenceType = [128, 128, 256, 256], ): + """ + Overview: + Init the ``BFSConvolution Encoder`` according to the provided arguments. + Arguments: + - obs_shape (:obj:`SequenceType`): Sequence of ``in_channel``, plus one or more ``input size``,\ + such as [4, 84, 84]. + - action_dim (:obj:`int`): Action space shape, such as 6. + - cnn_hidden_list (:obj:`SequenceType`): The cnn channel dims for each block, such as [128, 128, 256, 256]. + """ super().__init__() num_layers = len(encoder_hidden_size_list) @@ -207,6 +307,21 @@ def __init__( ) def forward(self, x: torch.Tensor) -> Dict: + """ + Overview: + The computation graph. Given a 3-dim observation, this function will return a tensor with the same \ + height and width. The channel number of output will be the ``action_shape``. + Arguments: + - x (:obj:`torch.Tensor`): The input observation tensor data. + Returns: + - outputs (:obj:`Dict`): The output dict of model's forward computation graph, \ + only contains a single key ``logit``. + Examples: + >>> model = ProcedureCloningBFS([3, 16, 16], 4) + >>> inputs = torch.randn(16, 16, 3).unsqueeze(0) + >>> outputs = model(inputs) + >>> assert outputs['logit'].shape == torch.Size([16, 16, 4]) + """ x = x.permute(0, 3, 1, 2) x = self._encoder(x) return {'logit': x.permute(0, 2, 3, 1)} diff --git a/ding/model/template/tests/test_procedure_cloning.py b/ding/model/template/tests/test_procedure_cloning.py index 5a52542879..b2bb197954 100644 --- a/ding/model/template/tests/test_procedure_cloning.py +++ b/ding/model/template/tests/test_procedure_cloning.py @@ -24,9 +24,6 @@ def test_procedure_cloning_mcts(self, obs_shape, action_dim): 'actions': torch.randn(B, T, action_dim) } model = ProcedureCloningMCTS(obs_shape=obs_shape, action_dim=action_dim) - - print(model) - goal_preds, action_preds = model(inputs['states'], inputs['goals'], inputs['actions']) assert goal_preds.shape == (B, obs_embeddings) assert action_preds.shape == (B, T + 1, action_dim)