Unity-Technologies · vincentpierre · Feb 19, 2020 · Feb 13, 2020 · Feb 14, 2020 · Feb 14, 2020
diff --git a/com.unity.ml-agents/Runtime/Academy.cs b/com.unity.ml-agents/Runtime/Academy.cs
@@ -113,6 +113,10 @@ public bool IsCommunicatorOn
         // Signals to all the listeners that the academy is being destroyed
         internal event Action DestroyAction;
 
+        // Signals the Agent that a new step is about to start. 
+        // This will mark the Agent as Done if it has reached its maxSteps.
+        internal event Action AgentIncrementStep;
+
         // Signals to all the agents at each environment step along with the
         // Academy's maxStepReached, done and stepCount values. The agents rely
         // on this event to update their own values of max step reached and done
@@ -417,6 +421,9 @@ public void EnvironmentStep()
 
             AgentSetStatus?.Invoke(m_StepCount);
 
+            m_StepCount += 1;
+            m_TotalStepCount += 1;
+            AgentIncrementStep?.Invoke();
 
             using (TimerStack.Instance.Scoped("AgentSendState"))
             {
@@ -432,9 +439,6 @@ public void EnvironmentStep()
             {
                 AgentAct?.Invoke();
             }
-
-            m_StepCount += 1;
-            m_TotalStepCount += 1;
         }
 
         /// <summary>

diff --git a/com.unity.ml-agents/Runtime/Agent.cs b/com.unity.ml-agents/Runtime/Agent.cs
@@ -240,6 +240,7 @@ public void LazyInitialize()
             m_Action = new AgentAction();
             sensors = new List<ISensor>();
 
+            Academy.Instance.AgentIncrementStep += AgentIncrementStep;
             Academy.Instance.AgentSendState += SendInfo;
             Academy.Instance.DecideAction += DecideAction;
             Academy.Instance.AgentAct += AgentStep;
@@ -258,6 +259,7 @@ void OnDisable()
             // We don't want to even try, because this will lazily create a new Academy!
             if (Academy.IsInitialized)
             {
+                Academy.Instance.AgentIncrementStep -=AgentIncrementStep;
                 Academy.Instance.AgentSendState -= SendInfo;
                 Academy.Instance.DecideAction -= DecideAction;
                 Academy.Instance.AgentAct -= AgentStep;
@@ -688,19 +690,16 @@ void SendInfo()
             }
         }
 
+        void AgentIncrementStep()
+        {
+
+            m_StepCount += 1;
+
+        }
+
         /// Used by the brain to make the agent perform a step.
         void AgentStep()
         {
-            if ((m_StepCount >= maxStep) && (maxStep > 0))
-            {
-                NotifyAgentDone(true);
-                _AgentReset();
-            }
-            else
-            {
-                m_StepCount += 1;
-            }
-
             if ((m_RequestAction) && (m_Brain != null))
             {
                 m_RequestAction = false;
@@ -709,6 +708,12 @@ void AgentStep()
                     AgentAction(m_Action.vectorActions);
                 }
             }
+
+            if ((m_StepCount >= maxStep) && (maxStep > 0))
+            {
+                NotifyAgentDone(true);
+                _AgentReset();
+            }
         }
 
         void DecideAction()

diff --git a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
@@ -489,7 +489,8 @@ public void TestCumulativeReward()
             var j = 0;
             for (var i = 0; i < 500; i++)
             {
-                if (i % 21 == 0)
+                Debug.Log(i);
+                if (i % 20 == 0)
                 {
                     j = 0;
                 }
@@ -524,12 +525,11 @@ public void TestMaxStepsReset()
 
             for (var i = 0; i < 15; i++)
             {
-                // We expect resets to occur when there are maxSteps actions since the last reset (and on the first step)
-                var expectReset = agent1.agentActionCallsSinceLastReset == maxStep || (i == 0);
+                Debug.Log(i);
+                // We expect resets to occur when there are maxSteps actions since the last reset 
+                var expectReset = agent1.agentActionCallsSinceLastReset == maxStep;
                 var previousNumResets = agent1.agentResetCalls;
 
-                aca.EnvironmentStep();
-
                 if (expectReset)
                 {
                     Assert.AreEqual(previousNumResets + 1, agent1.agentResetCalls);
@@ -538,6 +538,8 @@ public void TestMaxStepsReset()
                 {
                     Assert.AreEqual(previousNumResets, agent1.agentResetCalls);
                 }
+
+                aca.EnvironmentStep();
             }
         }
     }