More misc hybrid action followup (#4777)

Unity-Technologies · Dec 18, 2020 · 27f7156 · 27f7156
1 parent 30ed097
commit 27f7156
Show file tree

Hide file tree

Showing 20 changed files with 105 additions and 70 deletions.
diff --git a/com.unity.ml-agents/Runtime/Academy.cs b/com.unity.ml-agents/Runtime/Academy.cs
@@ -89,7 +89,7 @@ public class Academy : IDisposable
         ///     </item>
         ///     <item>
         ///         <term>1.3.0</term>
-        ///         <description>Support action spaces with both continuous and discrete actions.</description>
+        ///         <description>Support both continuous and discrete actions.</description>
         ///     </item>
         /// </list>
         /// </remarks>
@@ -590,7 +590,7 @@ void EnvironmentReset()
         /// NNModel and the InferenceDevice as provided.
         /// </summary>
         /// <param name="model">The NNModel the ModelRunner must use.</param>
-        /// <param name="actionSpec"> Description of the action spaces for the Agent.</param>
+        /// <param name="actionSpec"> Description of the actions for the Agent.</param>
         /// <param name="inferenceDevice">
         /// The inference device (CPU or GPU) the ModelRunner will use.
         /// </param>

diff --git a/com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs b/com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
@@ -6,7 +6,7 @@
 namespace Unity.MLAgents.Actuators
 {
     /// <summary>
-    /// Defines the structure of an Action Space to be used by the Actuator system.
+    /// Defines the structure of the actions to be used by the Actuator system.
     /// </summary>
     [Serializable]
     public struct ActionSpec
@@ -15,9 +15,9 @@ public struct ActionSpec
         int m_NumContinuousActions;
 
         /// <summary>
-        /// An array of branch sizes for our action space.
+        /// An array of branch sizes for discrete actions.
         ///
-        /// For an IActuator that uses a Discrete <see cref="SpaceType"/>, the number of
+        /// For an IActuator that uses discrete actions, the number of
         /// branches is the Length of the Array and each index contains the branch size.
         /// The cumulative sum of the total number of discrete actions can be retrieved
         /// by the <see cref="SumOfDiscreteBranchSizes"/> property.
@@ -27,12 +27,12 @@ public struct ActionSpec
         public int[] BranchSizes;
 
         /// <summary>
-        /// The number of actions for a Continuous <see cref="SpaceType"/>.
+        /// The number of continuous actions that an Agent can take.
         /// </summary>
         public int NumContinuousActions { get { return m_NumContinuousActions; } set { m_NumContinuousActions = value; } }
 
         /// <summary>
-        /// The number of branches for a Discrete <see cref="SpaceType"/>.
+        /// The number of branches for discrete actions that an Agent can take.
         /// </summary>
         public int NumDiscreteActions { get { return BranchSizes == null ? 0 : BranchSizes.Length; } }
 
@@ -57,12 +57,11 @@ public static ActionSpec MakeContinuous(int numActions)
         /// Creates a Discrete <see cref="ActionSpec"/> with the array of branch sizes that
         /// represents the action space.
         /// </summary>
-        /// <param name="branchSizes">The array of branch sizes for the discrete action space.  Each index
+        /// <param name="branchSizes">The array of branch sizes for the discrete actions.  Each index
         /// contains the number of actions available for that branch.</param>
         /// <returns>An Discrete ActionSpec initialized with the array of branch sizes.</returns>
         public static ActionSpec MakeDiscrete(params int[] branchSizes)
         {
-            var numActions = branchSizes.Length;
             var actuatorSpace = new ActionSpec(0, branchSizes);
             return actuatorSpace;
         }

diff --git a/com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs b/com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs
@@ -15,7 +15,7 @@ public abstract class ActuatorComponent : MonoBehaviour
         public abstract IActuator CreateActuator();
 
         /// <summary>
-        /// The specification of the Action space for this ActuatorComponent.
+        /// The specification of the possible actions for this ActuatorComponent.
         /// This must produce the same results as the corresponding IActuator's ActionSpec.
         /// </summary>
         /// <seealso cref="ActionSpec"/>

diff --git a/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs b/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
@@ -73,7 +73,7 @@ void ReadyActuatorsForExecution()
         }
 
         /// <summary>
-        /// This method validates that all <see cref="IActuator"/>s have unique names and equivalent action space types
+        /// This method validates that all <see cref="IActuator"/>s have unique names
         /// if the `DEBUG` preprocessor macro is defined, and allocates the appropriate buffers to manage the actions for
         /// all of the <see cref="IActuator"/>s that may live on a particular object.
         /// </summary>
@@ -90,7 +90,6 @@ internal void ReadyActuatorsForExecution(IList<IActuator> actuators, int numCont
             }
 #if DEBUG
             // Make sure the names are actually unique
-            // Make sure all Actuators have the same SpaceType
             ValidateActuators();
 #endif
 
@@ -272,7 +271,7 @@ void SortActuators()
         }
 
         /// <summary>
-        /// Validates that the IActuators managed by this object have unique names and equivalent action space types.
+        /// Validates that the IActuators managed by this object have unique names.
         /// Each Actuator needs to have a unique name in order for this object to ensure that the storage of action
         /// buffers, and execution of Actuators remains deterministic across different sessions of running.
         /// </summary>

diff --git a/com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs b/com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
@@ -121,6 +121,7 @@ public void Clear()
         /// <summary>
         /// Check if the <see cref="ActionBuffers"/> is empty.
         /// </summary>
+        /// <returns>Whether the buffers are empty.</returns>
         public bool IsEmpty()
         {
             return ContinuousActions.IsEmpty() && DiscreteActions.IsEmpty();

diff --git a/com.unity.ml-agents/Runtime/Actuators/IActuator.cs b/com.unity.ml-agents/Runtime/Actuators/IActuator.cs
@@ -6,7 +6,7 @@ namespace Unity.MLAgents.Actuators
     public interface IActuator : IActionReceiver
     {
         /// <summary>
-        /// The specification of the Action space for this IActuator.
+        /// The specification of the actions for this IActuator.
         /// </summary>
         /// <seealso cref="ActionSpec"/>
         ActionSpec ActionSpec { get; }

diff --git a/com.unity.ml-agents/Runtime/Agent.cs b/com.unity.ml-agents/Runtime/Agent.cs
@@ -19,9 +19,9 @@ namespace Unity.MLAgents
     internal struct AgentInfo
     {
         /// <summary>
-        /// Keeps track of the last vector action taken by the Brain.
+        /// Keeps track of the last actions taken by the Brain.
         /// </summary>
-        public ActionBuffers storedVectorActions;
+        public ActionBuffers storedActions;
 
         /// <summary>
         /// For discrete control, specifies the actions that the agent cannot take.
@@ -52,17 +52,17 @@ internal struct AgentInfo
 
         public void ClearActions()
         {
-            storedVectorActions.Clear();
+            storedActions.Clear();
         }
 
         public void CopyActions(ActionBuffers actionBuffers)
         {
-            var continuousActions = storedVectorActions.ContinuousActions;
+            var continuousActions = storedActions.ContinuousActions;
             for (var i = 0; i < actionBuffers.ContinuousActions.Length; i++)
             {
                 continuousActions[i] = actionBuffers.ContinuousActions[i];
             }
-            var discreteActions = storedVectorActions.DiscreteActions;
+            var discreteActions = storedActions.DiscreteActions;
             for (var i = 0; i < actionBuffers.DiscreteActions.Length; i++)
             {
                 discreteActions[i] = actionBuffers.DiscreteActions[i];
@@ -438,7 +438,7 @@ public void LazyInitialize()
                 InitializeSensors();
             }
 
-            m_Info.storedVectorActions = new ActionBuffers(
+            m_Info.storedActions = new ActionBuffers(
                 new float[m_ActuatorManager.NumContinuousActions],
                 new int[m_ActuatorManager.NumDiscreteActions]
             );
@@ -557,7 +557,7 @@ void NotifyAgentDone(DoneReason doneReason)
             m_CumulativeReward = 0f;
             m_RequestAction = false;
             m_RequestDecision = false;
-            m_Info.storedVectorActions.Clear();
+            m_Info.storedActions.Clear();
         }
 
         /// <summary>
@@ -886,12 +886,22 @@ public virtual void Initialize() { }
         /// <seealso cref="IActionReceiver.OnActionReceived"/>
         public virtual void Heuristic(in ActionBuffers actionsOut)
         {
+            var brainParams = m_PolicyFactory.BrainParameters;
+            var actionSpec = brainParams.ActionSpec;
+            // For continuous and discrete actions together, we don't need to fall back to the legacy method
+            if (actionSpec.NumContinuousActions > 0 && actionSpec.NumDiscreteActions > 0)
+            {
+                Debug.LogWarning("Heuristic method called but not implemented. Clearing ActionBuffers.");
+                actionsOut.Clear();
+                return;
+            }
+
             // Disable deprecation warnings so we can call the legacy overload.
 #pragma warning disable CS0618
 
             // The default implementation of Heuristic calls the
             // obsolete version for backward compatibility
-            switch (m_PolicyFactory.BrainParameters.VectorActionSpaceType)
+            switch (brainParams.VectorActionSpaceType)
             {
                 case SpaceType.Continuous:
                     Heuristic(actionsOut.ContinuousActions.Array);
@@ -1038,7 +1048,7 @@ void SendInfoToBrain()
                     CollectObservations(collectObservationsSensor);
                 }
             }
-            using (TimerStack.Instance.Scoped("CollectDiscreteActionMasks"))
+            using (TimerStack.Instance.Scoped("WriteActionMask"))
             {
                 m_ActuatorManager.WriteActionMask();
             }
@@ -1135,7 +1145,7 @@ public ReadOnlyCollection<float> GetObservations()
         }
 
         /// <summary>
-        /// Implement `CollectDiscreteActionMasks()` to collects the masks for discrete
+        /// Implement `WriteDiscreteActionMask()` to collects the masks for discrete
         /// actions. When using discrete actions, the agent will not perform the masked
         /// action.
         /// </summary>
@@ -1144,7 +1154,7 @@ public ReadOnlyCollection<float> GetObservations()
         /// </param>
         /// <remarks>
         /// When using Discrete Control, you can prevent the Agent from using a certain
-        /// action by masking it with <see cref="DiscreteActionMasker.SetMask(int, IEnumerable{int})"/>.
+        /// action by masking it with <see cref="IDiscreteActionMask.WriteMask(int, IEnumerable{int})"/>.
         ///
         /// See [Agents - Actions] for more information on masking actions.
         ///
@@ -1168,30 +1178,29 @@ public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
         /// on the provided action.
         /// </summary>
         /// <remarks>
-        /// An action is passed to this function in the form of an array vector. Your
-        /// implementation must use the array to direct the agent's behavior for the
+        /// An action is passed to this function in the form of an  <seealso cref="ActionBuffers"/>.
+        /// Your implementation must use the array to direct the agent's behavior for the
         /// current step.
         ///
-        /// You decide how many elements you need in the action array to control your
+        /// You decide how many elements you need in the ActionBuffers to control your
         /// agent and what each element means. For example, if you want to apply a
         /// force to move an agent around the environment, you can arbitrarily pick
-        /// three values in the action array to use as the force components. During
-        /// training, the agent's  policy learns to set those particular elements of
+        /// three values in ActionBuffers.ContinuousActions array to use as the force components.
+        /// During training, the agent's  policy learns to set those particular elements of
         /// the array to maximize the training rewards the agent receives. (Of course,
         /// if you implement a <seealso cref="Heuristic(in ActionBuffers)"/> function, it must use the same
         /// elements of the action array for the same purpose since there is no learning
         /// involved.)
         ///
-        /// Actions for an agent can be either *Continuous* or *Discrete*. Specify which
-        /// type of action space an agent uses, along with the size of the action array,
-        /// in the <see cref="BrainParameters"/> of the agent's associated
+        /// An Agent can use continuous and/or discrete actions. Configure this  along with the size
+        /// of the action array,  in the <see cref="BrainParameters"/> of the agent's associated
         /// <see cref="BehaviorParameters"/> component.
         ///
-        /// When an agent uses the continuous action space, the values in the action
+        /// When an agent uses continuous actions, the values in the ActionBuffers.ContinuousActions
         /// array are floating point numbers. You should clamp the values to the range,
         /// -1..1, to increase numerical stability during training.
         ///
-        /// When an agent uses the discrete action space, the values in the action array
+        /// When an agent uses discrete actions, the values in the ActionBuffers.DiscreteActions array
         /// are integers that each represent a specific, discrete action. For example,
         /// you could define a set of discrete actions such as:
         ///
@@ -1204,24 +1213,23 @@ public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
         /// </code>
         ///
         /// When making a decision, the agent picks one of the five actions and puts the
-        /// corresponding integer value in the action vector. For example, if the agent
-        /// decided to move left, the action vector parameter would contain an array with
+        /// corresponding integer value in the ActionBuffers.DiscreteActions array. For example, if the agent
+        /// decided to move left, the ActionBuffers.DiscreteActions parameter would be an array with
         /// a single element with the value 1.
         ///
         /// You can define multiple sets, or branches, of discrete actions to allow an
         /// agent to perform simultaneous, independent actions. For example, you could
         /// use one branch for movement and another branch for throwing a ball left, right,
         /// up, or down, to allow the agent to do both in the same step.
         ///
-        /// The action vector of a discrete action space contains one element for each
-        /// branch. The value of each element is the integer representing the chosen
-        /// action for that branch. The agent always chooses one action for each
-        /// branch.
+        /// The ActionBuffers.DiscreteActions array of an agent with discrete actions contains one
+        /// element for each  branch. The value of each element is the integer representing the
+        /// chosen action for that branch. The agent always chooses one action for each branch.
         ///
-        /// When you use the discrete action space, you can prevent the training process
+        /// When you use the discrete actions, you can prevent the training process
         /// or the neural network model from choosing specific actions in a step by
-        /// implementing the <see cref="CollectDiscreteActionMasks(DiscreteActionMasker)"/>
-        /// function. For example, if your agent is next to a wall, you could mask out any
+        /// implementing the <see cref="WriteDiscreteActionMask(IDiscreteActionMask)"/>
+        /// method. For example, if your agent is next to a wall, you could mask out any
         /// actions that would result in the agent trying to move into the wall.
         ///
         /// For more information about implementing agent actions see [Agents - Actions].
@@ -1233,6 +1241,14 @@ public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
         /// </param>
         public virtual void OnActionReceived(ActionBuffers actions)
         {
+            var actionSpec = m_PolicyFactory.BrainParameters.ActionSpec;
+            // For continuous and discrete actions together, we don't need to fall back to the legacy method
+            if (actionSpec.NumContinuousActions > 0 && actionSpec.NumDiscreteActions > 0)
+            {
+                // Nothing implemented.
+                return;
+            }
+
             if (!actions.ContinuousActions.IsEmpty())
             {
                 m_LegacyActionCache = actions.ContinuousActions.Array;

diff --git a/com.unity.ml-agents/Runtime/Agent.deprecated.cs b/com.unity.ml-agents/Runtime/Agent.deprecated.cs
@@ -42,7 +42,14 @@ public virtual void OnActionReceived(float[] vectorAction) { }
         [Obsolete("GetAction has been deprecated, please use GetStoredActionBuffers instead.")]
         public float[] GetAction()
         {
-            var storedAction = m_Info.storedVectorActions;
+            var actionSpec = m_PolicyFactory.BrainParameters.ActionSpec;
+            // For continuous and discrete actions together, this shouldn't be called because we can only return one.
+            if (actionSpec.NumContinuousActions > 0 && actionSpec.NumDiscreteActions > 0)
+            {
+                Debug.LogWarning("Agent.GetAction() when both continuous and discrete actions are in use. Use Agent.GetStoredActionBuffers() instead.");
+            }
+
+            var storedAction = m_Info.storedActions;
             if (!storedAction.ContinuousActions.IsEmpty())
             {
                 return storedAction.ContinuousActions.Array;

diff --git a/com.unity.ml-agents/Runtime/Analytics/InferenceAnalytics.cs b/com.unity.ml-agents/Runtime/Analytics/InferenceAnalytics.cs
@@ -84,7 +84,7 @@ public static bool IsAnalyticsEnabled()
         /// <param name="behaviorName">The BehaviorName of the Agent using the model</param>
         /// <param name="inferenceDevice">Whether inference is being performed on the CPU or GPU</param>
         /// <param name="sensors">List of ISensors for the Agent. Used to generate information about the observation space.</param>
-        /// <param name="actionSpec">ActionSpec for the Agent. Used to generate information about the action space.</param>
+        /// <param name="actionSpec">ActionSpec for the Agent. Used to generate information about the actions.</param>
         /// <returns></returns>
         public static void InferenceModelSet(
             NNModel nnModel,