Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make the DecisionRequester public and customizable. #3716

Merged
merged 4 commits into from
Apr 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .yamato/gym-interface-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ test_gym_interface_{{ editor.version }}:
commands:
- pip install pyyaml
- python -u -m ml-agents.tests.yamato.setup_venv
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py --env=Project/testPlayer-Basic
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic
dependencies:
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
triggers:
Expand Down
4 changes: 2 additions & 2 deletions .yamato/protobuf-generation-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,6 @@ test_mac_protobuf_generation:
- "protobuf-definitions/*.md"
- "protobuf-definitions/**/*.md"
artifacts:
dist:
patch:
paths:
- "artifacts/*"
- "artifacts/*.*"
2 changes: 1 addition & 1 deletion .yamato/python-ll-api-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ test_mac_ll_api_{{ editor.version }}:
- python -u -m ml-agents.tests.yamato.setup_venv
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py
dependencies:
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }} --env=Project/testPlayer
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
triggers:
cancel_old_ci: true
changes:
Expand Down
5 changes: 4 additions & 1 deletion .yamato/standalone-build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ test_mac_standalone_{{ editor.version }}:
- "com.unity.ml-agents/*.md"
- "com.unity.ml-agents/**/*.md"
artifacts:
logs:
paths:
- "artifacts/standalone_build.txt"
standalonebuild:
paths:
- "Project/testPlayer*/**"
- "artifacts/testPlayer*/**"
{% endfor %}
11 changes: 7 additions & 4 deletions .yamato/training-int-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ test_mac_training_int_{{ editor.version }}:
# Backwards-compatibility tests.
# If we make a breaking change to the communication protocol, these will need
# to be disabled until the next release.
- python -u -m ml-agents.tests.yamato.training_int_tests --python=0.15.0
- python -u -m ml-agents.tests.yamato.training_int_tests --csharp=0.15.0
# - python -u -m ml-agents.tests.yamato.training_int_tests --python=0.15.0
# - python -u -m ml-agents.tests.yamato.training_int_tests --csharp=0.15.0
dependencies:
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
triggers:
Expand All @@ -35,7 +35,10 @@ test_mac_training_int_{{ editor.version }}:
- "com.unity.ml-agents/*.md"
- "com.unity.ml-agents/**/*.md"
artifacts:
unit:
logs:
paths:
- "artifacts/**"
- "artifacts/standalone_build.txt"
standalonebuild:
paths:
- "artifacts/testplayer*/**"
{% endfor %}
2 changes: 2 additions & 0 deletions com.unity.ml-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Removed the multi-agent gym option from the gym wrapper. For multi-agent scenarios, use the [Low Level Python API](Python-API.md).
- The low level Python API has changed. You can look at the document [Low Level Python API documentation](Python-API.md) for more information. If you use `mlagents-learn` for training, this should be a transparent change.
- Added ability to start training (initialize model weights) from a previous run ID. (#3710)
- The internal event `Academy.AgentSetStatus` was renamed to `Academy.AgentPreStep` and made public.
- The offset logic was removed from DecisionRequester.

### Minor Changes
- Format of console output has changed slightly and now matches the name of the model/summary directory. (#3630, #3616)
Expand Down
19 changes: 11 additions & 8 deletions com.unity.ml-agents/Runtime/Academy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public class Academy : IDisposable
/// on each side, although we may allow some flexibility in the future.
/// This should be incremented whenever a change is made to the communication protocol.
/// </summary>
const string k_ApiVersion = "0.15.0";
const string k_ApiVersion = "0.16.0";

/// <summary>
/// Unity package version of com.unity.ml-agents.
Expand Down Expand Up @@ -138,11 +138,14 @@ public bool IsCommunicatorOn
// This will mark the Agent as Done if it has reached its maxSteps.
internal event Action AgentIncrementStep;

// Signals to all the agents at each environment step along with the
// Academy's maxStepReached, done and stepCount values. The agents rely
// on this event to update their own values of max step reached and done
// in addition to aligning on the step count of the global episode.
internal event Action<int> AgentSetStatus;

/// <summary>
/// Signals to all of the <see cref="Agent"/>s that their step is about to begin.
/// This is a good time for an <see cref="Agent"/> to decide if it would like to
/// call <see cref="Agent.RequestDecision"/> or <see cref="Agent.RequestAction"/>
/// for this step. Any other pre-step setup could be done during this even as well.
/// </summary>
public event Action<int> AgentPreStep;

// Signals to all the agents at each environment step so they can send
// their state to their Policy if they have requested a decision.
Expand Down Expand Up @@ -347,7 +350,7 @@ void ResetActions()
{
DecideAction = () => {};
DestroyAction = () => {};
AgentSetStatus = i => {};
AgentPreStep = i => {};
AgentSendState = () => {};
AgentAct = () => {};
AgentForceReset = () => {};
Expand Down Expand Up @@ -423,7 +426,7 @@ public void EnvironmentStep()
ForcedFullReset();
}

AgentSetStatus?.Invoke(m_StepCount);
AgentPreStep?.Invoke(m_StepCount);

m_StepCount += 1;
m_TotalStepCount += 1;
Expand Down
34 changes: 15 additions & 19 deletions com.unity.ml-agents/Runtime/DecisionRequester.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using System;
using UnityEngine;
using UnityEngine.Serialization;

Expand All @@ -8,12 +9,12 @@ namespace MLAgents
/// at regular intervals.
/// </summary>
[AddComponentMenu("ML Agents/Decision Requester", (int)MenuGroup.Default)]
internal class DecisionRequester : MonoBehaviour
[RequireComponent(typeof(Agent))]
public class DecisionRequester : MonoBehaviour
{
/// <summary>
/// The frequency with which the agent requests a decision. A DecisionPeriod of 5 means
/// that the Agent will request a decision every 5 Academy steps.
/// </summary>
/// that the Agent will request a decision every 5 Academy steps. /// </summary>
[Range(1, 20)]
[Tooltip("The frequency with which the agent requests a decision. A DecisionPeriod " +
"of 5 means that the Agent will request a decision every 5 Academy steps.")]
Expand All @@ -29,37 +30,32 @@ internal class DecisionRequester : MonoBehaviour
[FormerlySerializedAs("RepeatAction")]
public bool TakeActionsBetweenDecisions = true;

/// <summary>
/// Whether or not the Agent decisions should start at an offset (different for each agent).
/// This does not affect <see cref="DecisionPeriod"/>. Turning this on will distribute
/// the decision-making computations for all the agents across multiple Academy steps.
/// This can be valuable in scenarios where you have many agents in the scene, particularly
/// during the inference phase.
/// </summary>
[Tooltip("Whether or not Agent decisions should start at an offset.")]
public bool offsetStep;

[NonSerialized]
Agent m_Agent;
int m_Offset;

internal void Awake()
{
m_Offset = offsetStep ? gameObject.GetInstanceID() : 0;
m_Agent = gameObject.GetComponent<Agent>();
Academy.Instance.AgentSetStatus += MakeRequests;
Debug.Assert(m_Agent != null, "Agent component was not found on this gameObject and is required.");
surfnerd marked this conversation as resolved.
Show resolved Hide resolved
Academy.Instance.AgentPreStep += MakeRequests;
}

void OnDestroy()
{
if (Academy.IsInitialized)
{
Academy.Instance.AgentSetStatus -= MakeRequests;
Academy.Instance.AgentPreStep -= MakeRequests;
}
}

void MakeRequests(int count)
/// <summary>
/// Method that hooks into the Academy in order inform the Agent on whether or not it should request a
/// decision, and whether or not it should take actions between decisions.
/// </summary>
/// <param name="academyStepCount">The current step count of the academy.</param>
void MakeRequests(int academyStepCount)
{
if ((count + m_Offset) % DecisionPeriod == 0)
if (academyStepCount % DecisionPeriod == 0)
{
m_Agent?.RequestDecision();
}
Expand Down
102 changes: 1 addition & 101 deletions com.unity.ml-agents/Tests/Editor/PublicAPI/PublicApiValidation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using MLAgents.Sensors;
using NUnit.Framework;
using UnityEngine;
using UnityEngine.TestTools;

namespace MLAgentsExamples
{
Expand Down Expand Up @@ -71,106 +72,5 @@ public void CheckSetupRayPerceptionSensorComponent()

sensorComponent.CreateSensor();
}

class PublicApiAgent : Agent
{
public int numHeuristicCalls;

public override float[] Heuristic()
{
numHeuristicCalls++;
return base.Heuristic();
}
}

// Simple SensorComponent that sets up a StackingSensor
class StackingComponent : SensorComponent
{
public SensorComponent wrappedComponent;
public int numStacks;

public override ISensor CreateSensor()
{
var wrappedSensor = wrappedComponent.CreateSensor();
return new StackingSensor(wrappedSensor, numStacks);
}

public override int[] GetObservationShape()
{
int[] shape = (int[]) wrappedComponent.GetObservationShape().Clone();
for (var i = 0; i < shape.Length; i++)
{
shape[i] *= numStacks;
}

return shape;
}
}


[Test]
public void CheckSetupAgent()
{
var gameObject = new GameObject();

var behaviorParams = gameObject.AddComponent<BehaviorParameters>();
behaviorParams.brainParameters.vectorObservationSize = 3;
behaviorParams.brainParameters.numStackedVectorObservations = 2;
behaviorParams.brainParameters.vectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
behaviorParams.brainParameters.vectorActionSize = new[] { 2, 2 };
behaviorParams.brainParameters.vectorActionSpaceType = SpaceType.Discrete;
behaviorParams.behaviorName = "TestBehavior";
behaviorParams.TeamId = 42;
behaviorParams.useChildSensors = true;

var agent = gameObject.AddComponent<PublicApiAgent>();
// Make sure we can set the behavior type correctly after the agent is added
behaviorParams.behaviorType = BehaviorType.InferenceOnly;
// Can't actually create an Agent with InferenceOnly and no model, so change back
behaviorParams.behaviorType = BehaviorType.Default;

// TODO - not internal yet
// var decisionRequester = gameObject.AddComponent<DecisionRequester>();
// decisionRequester.DecisionPeriod = 2;

var sensorComponent = gameObject.AddComponent<RayPerceptionSensorComponent3D>();
sensorComponent.sensorName = "ray3d";
sensorComponent.detectableTags = new List<string> { "Player", "Respawn" };
sensorComponent.raysPerDirection = 3;

// Make a StackingSensor that wraps the RayPerceptionSensorComponent3D
// This isn't necessarily practical, just to ensure that it can be done
var wrappingSensorComponent = gameObject.AddComponent<StackingComponent>();
wrappingSensorComponent.wrappedComponent = sensorComponent;
wrappingSensorComponent.numStacks = 3;

// ISensor isn't set up yet.
Assert.IsNull(sensorComponent.raySensor);

agent.LazyInitialize();
// Make sure we can set the behavior type correctly after the agent is initialized
// (this creates a new policy).
behaviorParams.behaviorType = BehaviorType.HeuristicOnly;

// Initialization should set up the sensors
Assert.IsNotNull(sensorComponent.raySensor);

// Let's change the inference device
var otherDevice = behaviorParams.inferenceDevice == InferenceDevice.CPU ? InferenceDevice.GPU : InferenceDevice.CPU;
agent.SetModel(behaviorParams.behaviorName, behaviorParams.model, otherDevice);

agent.AddReward(1.0f);

agent.RequestAction();
agent.RequestDecision();

Academy.Instance.AutomaticSteppingEnabled = false;
Academy.Instance.EnvironmentStep();

var actions = agent.GetAction();
// default Heuristic implementation should return zero actions.
Assert.AreEqual(new[] {0.0f, 0.0f}, actions);
Assert.AreEqual(1, agent.numHeuristicCalls);
}
}
}
Loading