Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Develop modify stepping logic #3448

Merged
merged 10 commits into from
Feb 19, 2020
10 changes: 7 additions & 3 deletions com.unity.ml-agents/Runtime/Academy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ public bool IsCommunicatorOn
// Signals to all the listeners that the academy is being destroyed
internal event Action DestroyAction;

// Signals the Agent that a new step is about to start.
// This will mark the Agent as Done if it has reached its maxSteps.
internal event Action AgentIncrementStep;

// Signals to all the agents at each environment step along with the
// Academy's maxStepReached, done and stepCount values. The agents rely
// on this event to update their own values of max step reached and done
Expand Down Expand Up @@ -417,6 +421,9 @@ public void EnvironmentStep()

AgentSetStatus?.Invoke(m_StepCount);

m_StepCount += 1;
m_TotalStepCount += 1;
AgentIncrementStep?.Invoke();

using (TimerStack.Instance.Scoped("AgentSendState"))
{
Expand All @@ -432,9 +439,6 @@ public void EnvironmentStep()
{
AgentAct?.Invoke();
}

m_StepCount += 1;
m_TotalStepCount += 1;
}

/// <summary>
Expand Down
25 changes: 15 additions & 10 deletions com.unity.ml-agents/Runtime/Agent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ public void LazyInitialize()
m_Action = new AgentAction();
sensors = new List<ISensor>();

Academy.Instance.AgentIncrementStep += AgentIncrementStep;
Academy.Instance.AgentSendState += SendInfo;
Academy.Instance.DecideAction += DecideAction;
Academy.Instance.AgentAct += AgentStep;
Expand All @@ -258,6 +259,7 @@ void OnDisable()
// We don't want to even try, because this will lazily create a new Academy!
if (Academy.IsInitialized)
{
Academy.Instance.AgentIncrementStep -=AgentIncrementStep;
vincentpierre marked this conversation as resolved.
Show resolved Hide resolved
Academy.Instance.AgentSendState -= SendInfo;
Academy.Instance.DecideAction -= DecideAction;
Academy.Instance.AgentAct -= AgentStep;
Expand Down Expand Up @@ -688,19 +690,16 @@ void SendInfo()
}
}

void AgentIncrementStep()
{

vincentpierre marked this conversation as resolved.
Show resolved Hide resolved
m_StepCount += 1;
vincentpierre marked this conversation as resolved.
Show resolved Hide resolved

vincentpierre marked this conversation as resolved.
Show resolved Hide resolved
}

/// Used by the brain to make the agent perform a step.
void AgentStep()
{
if ((m_StepCount >= maxStep) && (maxStep > 0))
{
NotifyAgentDone(true);
_AgentReset();
}
else
{
m_StepCount += 1;
}

if ((m_RequestAction) && (m_Brain != null))
{
m_RequestAction = false;
Expand All @@ -709,6 +708,12 @@ void AgentStep()
AgentAction(m_Action.vectorActions);
}
}

if ((m_StepCount >= maxStep) && (maxStep > 0))
{
NotifyAgentDone(true);
_AgentReset();
}
}

void DecideAction()
Expand Down
12 changes: 7 additions & 5 deletions com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,8 @@ public void TestCumulativeReward()
var j = 0;
for (var i = 0; i < 500; i++)
{
if (i % 21 == 0)
Debug.Log(i);
if (i % 20 == 0)
{
j = 0;
}
Expand Down Expand Up @@ -524,12 +525,11 @@ public void TestMaxStepsReset()

for (var i = 0; i < 15; i++)
{
// We expect resets to occur when there are maxSteps actions since the last reset (and on the first step)
var expectReset = agent1.agentActionCallsSinceLastReset == maxStep || (i == 0);
Debug.Log(i);
// We expect resets to occur when there are maxSteps actions since the last reset
var expectReset = agent1.agentActionCallsSinceLastReset == maxStep;
vincentpierre marked this conversation as resolved.
Show resolved Hide resolved
var previousNumResets = agent1.agentResetCalls;

aca.EnvironmentStep();

if (expectReset)
{
Assert.AreEqual(previousNumResets + 1, agent1.agentResetCalls);
Expand All @@ -538,6 +538,8 @@ public void TestMaxStepsReset()
{
Assert.AreEqual(previousNumResets, agent1.agentResetCalls);
}

aca.EnvironmentStep();
}
}
}
Expand Down