From 6d80d252108f2037861b0f9196a118e178a0fd66 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 15 Apr 2021 13:05:56 -0700
Subject: [PATCH 1/2] Fix the attention module embedding size

---
 .../trainers/tests/torch/test_utils.py        |  2 +-
 ml-agents/mlagents/trainers/torch/networks.py | 31 +++++++++++++------
 ml-agents/mlagents/trainers/torch/utils.py    | 14 ++++++---
 3 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/ml-agents/mlagents/trainers/tests/torch/test_utils.py b/ml-agents/mlagents/trainers/tests/torch/test_utils.py
index 494119e8a6..b18ce3a894 100644
--- a/ml-agents/mlagents/trainers/tests/torch/test_utils.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_utils.py
@@ -51,7 +51,7 @@ def test_create_inputs(encoder_type, normalize, num_vector, num_visual):
     h_size = 128
     obs_spec = create_observation_specs_with_shapes(obs_shapes)
     encoders, embedding_sizes = ModelUtils.create_input_processors(
-        obs_spec, h_size, encoder_type, normalize
+        obs_spec, h_size, encoder_type, h_size, normalize
     )
     total_output = sum(embedding_sizes)
     vec_enc = []
diff --git a/ml-agents/mlagents/trainers/torch/networks.py b/ml-agents/mlagents/trainers/torch/networks.py
index 0023653170..dce8b03305 100644
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py
@@ -32,6 +32,8 @@
 
 
 class ObservationEncoder(nn.Module):
+    ATTENTION_EMBEDDING_SIZE = 128  # The embedding size of attention is fixed
+
     def __init__(
         self,
         observation_specs: List[ObservationSpec],
@@ -45,13 +47,17 @@ def __init__(
         """
         super().__init__()
         self.processors, self.embedding_sizes = ModelUtils.create_input_processors(
-            observation_specs, h_size, vis_encode_type, normalize=normalize
+            observation_specs,
+            h_size,
+            vis_encode_type,
+            self.ATTENTION_EMBEDDING_SIZE,
+            normalize=normalize,
         )
         self.rsa, self.x_self_encoder = ModelUtils.create_residual_self_attention(
-            self.processors, self.embedding_sizes, h_size
+            self.processors, self.embedding_sizes, self.ATTENTION_EMBEDDING_SIZE
         )
         if self.rsa is not None:
-            total_enc_size = sum(self.embedding_sizes) + h_size
+            total_enc_size = sum(self.embedding_sizes) + self.ATTENTION_EMBEDDING_SIZE
         else:
             total_enc_size = sum(self.embedding_sizes)
         self.normalize = normalize
@@ -247,6 +253,8 @@ def forward(
 
 
 class MultiAgentNetworkBody(torch.nn.Module):
+    ATTENTION_EMBEDDING_SIZE = 128
+
     """
     A network body that uses a self attention layer to handle state
     and action input from a potentially variable number of agents that
@@ -284,13 +292,18 @@ def __init__(
             + sum(self.action_spec.discrete_branches)
             + self.action_spec.continuous_size
         )
-        self.obs_encoder = EntityEmbedding(obs_only_ent_size, None, self.h_size)
-        self.obs_action_encoder = EntityEmbedding(q_ent_size, None, self.h_size)
 
-        self.self_attn = ResidualSelfAttention(self.h_size)
+        self.obs_encoder = EntityEmbedding(
+            obs_only_ent_size, None, self.ATTENTION_EMBEDDING_SIZE
+        )
+        self.obs_action_encoder = EntityEmbedding(
+            q_ent_size, None, self.ATTENTION_EMBEDDING_SIZE
+        )
+
+        self.self_attn = ResidualSelfAttention(self.ATTENTION_EMBEDDING_SIZE)
 
         self.linear_encoder = LinearEncoder(
-            self.h_size,
+            self.ATTENTION_EMBEDDING_SIZE,
             network_settings.num_layers,
             self.h_size,
             kernel_gain=(0.125 / self.h_size) ** 0.5,
@@ -337,9 +350,7 @@ def _copy_and_remove_nans_from_obs(
             no_nan_obs = []
             for obs in single_agent_obs:
                 new_obs = obs.clone()
-                new_obs[
-                    attention_mask.bool()[:, i_agent], ::
-                ] = 0.0  # Remoove NaNs fast
+                new_obs[attention_mask.bool()[:, i_agent], ::] = 0.0  # Remove NaNs fast
                 no_nan_obs.append(new_obs)
             obs_with_no_nans.append(no_nan_obs)
         return obs_with_no_nans
diff --git a/ml-agents/mlagents/trainers/torch/utils.py b/ml-agents/mlagents/trainers/torch/utils.py
index e2a5b6e750..3d7ecd2836 100644
--- a/ml-agents/mlagents/trainers/torch/utils.py
+++ b/ml-agents/mlagents/trainers/torch/utils.py
@@ -142,13 +142,15 @@ def get_encoder_for_obs(
         obs_spec: ObservationSpec,
         normalize: bool,
         h_size: int,
+        attention_embedding_size: int,
         vis_encode_type: EncoderType,
     ) -> Tuple[nn.Module, int]:
         """
         Returns the encoder and the size of the appropriate encoder.
         :param shape: Tuples that represent the observation dimension.
         :param normalize: Normalize all vector inputs.
-        :param h_size: Number of hidden units per layer.
+        :param h_size: Number of hidden units per layer excluding attention layers.
+        :param attention_embedding_size: Number of hidden units per attention layer.
         :param vis_encode_type: Type of visual encoder to use.
         """
         shape = obs_spec.shape
@@ -167,7 +169,7 @@ def get_encoder_for_obs(
                 EntityEmbedding(
                     entity_size=shape[1],
                     entity_num_max_elements=shape[0],
-                    embedding_size=h_size,
+                    embedding_size=attention_embedding_size,
                 ),
                 0,
             )
@@ -179,6 +181,7 @@ def create_input_processors(
         observation_specs: List[ObservationSpec],
         h_size: int,
         vis_encode_type: EncoderType,
+        attention_embedding_size: int,
         normalize: bool = False,
     ) -> Tuple[nn.ModuleList, List[int]]:
         """
@@ -186,7 +189,8 @@ def create_input_processors(
         :param observation_specs: List of ObservationSpec that represent the observation dimensions.
         :param action_size: Number of additional un-normalized inputs to each vector encoder. Used for
             conditioning network on other values (e.g. actions for a Q function)
-        :param h_size: Number of hidden units per layer.
+        :param h_size: Number of hidden units per layer excluding attention layers.
+        :param attention_embedding_size: Number of hidden units per attention layer.
         :param vis_encode_type: Type of visual encoder to use.
         :param unnormalized_inputs: Vector inputs that should not be normalized, and added to the vector
             obs.
@@ -200,7 +204,7 @@ def create_input_processors(
         embedding_sizes: List[int] = []
         for obs_spec in observation_specs:
             encoder, embedding_size = ModelUtils.get_encoder_for_obs(
-                obs_spec, normalize, h_size, vis_encode_type
+                obs_spec, normalize, h_size, attention_embedding_size, vis_encode_type
             )
             encoders.append(encoder)
             embedding_sizes.append(embedding_size)
@@ -209,7 +213,7 @@ def create_input_processors(
         if x_self_size > 0:
             for enc in encoders:
                 if isinstance(enc, EntityEmbedding):
-                    enc.add_self_embedding(h_size)
+                    enc.add_self_embedding(attention_embedding_size)
         return (nn.ModuleList(encoders), embedding_sizes)
 
     @staticmethod

From 2789f0e4bf83badc1d649490f7bccb859823eeac Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@unity3d.com>
Date: Thu, 15 Apr 2021 15:20:05 -0700
Subject: [PATCH 2/2] editing the changelog

---
 com.unity.ml-agents/CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
index f2f8956dfb..5e4be3a07f 100755
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
@@ -59,6 +59,7 @@ This results in much less memory being allocated during inference with `CameraSe
 
 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - Some console output have been moved from `info` to `debug` and will not be printed by default. If you want all messages to be printed, you can run `mlagents-learn` with the `--debug` option or add the line `debug: true` at the top of the yaml config file. (#5211)
+- The embedding size of attention layers used when a BufferSensor is in the scene has been changed. It is now fixed to 128 units. It might be impossible to resume training from a checkpoint of a previous version. (#5272)
 
 ### Bug Fixes
 #### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)