diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 4fe97bdea3..2ef7334f8b 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -7,12 +7,19 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [2.0.1] - 2021-10-13 ### Minor Changes -#### com.unity.ml-agents (C#) +#### com.unity.ml-agents / com.unity.ml-agents.extensions (C#) - Upgrade to 2.0.1 +- Update gRPC native lib to universal for arm64 and x86_64. This change should enable ml-agents usage on mac M1 (#5283, #5519) #### ml-agents / ml-agents-envs / gym-unity (Python) -- Set gym version in gym-unity to gym release 0.20.0 +- Set gym version in gym-unity to gym release 0.20.0(#5540) +- Harden user PII protection logic and extend TrainingAnalytics to expose detailed configuration parameters. (#5512) - Added minimal analytics collection to LL-API (#5511) +### Bug Fixes +#### com.unity.ml-agents / com.unity.ml-agents.extensions (C#) +- Fixed the bug where curriculum learning would crash because of the incorrect run_options parsing. (#5586) +#### ml-agents / ml-agents-envs / gym-unity (Python) + ## [2.0.0] - 2021-09-01 ### Minor Changes #### com.unity.ml-agents (C#) diff --git a/com.unity.ml-agents/Runtime/Analytics/AnalyticsUtils.cs b/com.unity.ml-agents/Runtime/Analytics/AnalyticsUtils.cs index fb480b7a11..b206f6bd98 100644 --- a/com.unity.ml-agents/Runtime/Analytics/AnalyticsUtils.cs +++ b/com.unity.ml-agents/Runtime/Analytics/AnalyticsUtils.cs @@ -1,19 +1,45 @@ using System; +using System.Text; +using System.Security.Cryptography; using UnityEngine; namespace Unity.MLAgents.Analytics { + internal static class AnalyticsUtils { + /// + /// Conversion function from byte array to hex string + /// + /// + /// A byte array to be hex encoded. + private static string ToHexString(byte[] array) + { + StringBuilder hex = new StringBuilder(array.Length * 2); + foreach (byte b in array) + { + hex.AppendFormat("{0:x2}", b); + } + return hex.ToString(); + } + /// /// Hash a string to remove PII or secret info before sending to analytics /// - /// - /// A string containing the Hash128 of the input string. - public static string Hash(string s) + /// + /// A string containing the key to be used for HMAC encoding. + /// + /// A string containing the value to be encoded. + public static string Hash(string key, string value) { - var behaviorNameHash = Hash128.Compute(s); - return behaviorNameHash.ToString(); + string hash; + UTF8Encoding encoder = new UTF8Encoding(); + using (HMACSHA256 hmac = new HMACSHA256(encoder.GetBytes(key))) + { + Byte[] hmBytes = hmac.ComputeHash(encoder.GetBytes(value)); + hash = ToHexString(hmBytes); + } + return hash; } internal static bool s_SendEditorAnalytics = true; diff --git a/com.unity.ml-agents/Runtime/Analytics/Events.cs b/com.unity.ml-agents/Runtime/Analytics/Events.cs index f269d91436..4a34273c04 100644 --- a/com.unity.ml-agents/Runtime/Analytics/Events.cs +++ b/com.unity.ml-agents/Runtime/Analytics/Events.cs @@ -156,6 +156,7 @@ internal struct TrainingEnvironmentInitializedEvent public string TorchDeviceType; public int NumEnvironments; public int NumEnvironmentParameters; + public string RunOptions; } [Flags] @@ -188,5 +189,6 @@ internal struct TrainingBehaviorInitializedEvent public string VisualEncoder; public int NumNetworkLayers; public int NumNetworkHiddenUnits; + public string Config; } } diff --git a/com.unity.ml-agents/Runtime/Analytics/InferenceAnalytics.cs b/com.unity.ml-agents/Runtime/Analytics/InferenceAnalytics.cs index 4286aa844d..18d5fc62f8 100644 --- a/com.unity.ml-agents/Runtime/Analytics/InferenceAnalytics.cs +++ b/com.unity.ml-agents/Runtime/Analytics/InferenceAnalytics.cs @@ -156,7 +156,7 @@ IList actuators var inferenceEvent = new InferenceEvent(); // Hash the behavior name so that there's no concern about PII or "secret" data being leaked. - inferenceEvent.BehaviorName = AnalyticsUtils.Hash(behaviorName); + inferenceEvent.BehaviorName = AnalyticsUtils.Hash(k_VendorKey, behaviorName); inferenceEvent.BarracudaModelSource = barracudaModel.IrSource; inferenceEvent.BarracudaModelVersion = barracudaModel.IrVersion; diff --git a/com.unity.ml-agents/Runtime/Analytics/TrainingAnalytics.cs b/com.unity.ml-agents/Runtime/Analytics/TrainingAnalytics.cs index fb14e1bab7..1b12dcf5d4 100644 --- a/com.unity.ml-agents/Runtime/Analytics/TrainingAnalytics.cs +++ b/com.unity.ml-agents/Runtime/Analytics/TrainingAnalytics.cs @@ -192,8 +192,21 @@ internal static string ParseBehaviorName(string fullyQualifiedBehaviorName) return fullyQualifiedBehaviorName.Substring(0, lastQuestionIndex); } + internal static TrainingBehaviorInitializedEvent SanitizeTrainingBehaviorInitializedEvent(TrainingBehaviorInitializedEvent tbiEvent) + { + // Hash the behavior name if the message version is from an older version of ml-agents that doesn't do trainer-side hashing. + // We'll also, for extra safety, verify that the BehaviorName is the size of the expected SHA256 hash. + // Context: The config field was added at the same time as trainer side hashing, so messages including it should already be hashed. + if (tbiEvent.Config.Length == 0 || tbiEvent.BehaviorName.Length != 64) + { + tbiEvent.BehaviorName = AnalyticsUtils.Hash(k_VendorKey, tbiEvent.BehaviorName); + } + + return tbiEvent; + } + [Conditional("MLA_UNITY_ANALYTICS_MODULE")] - public static void TrainingBehaviorInitialized(TrainingBehaviorInitializedEvent tbiEvent) + public static void TrainingBehaviorInitialized(TrainingBehaviorInitializedEvent rawTbiEvent) { #if UNITY_EDITOR && MLA_UNITY_ANALYTICS_MODULE if (!IsAnalyticsEnabled()) @@ -202,6 +215,7 @@ public static void TrainingBehaviorInitialized(TrainingBehaviorInitializedEvent if (!EnableAnalytics()) return; + var tbiEvent = SanitizeTrainingBehaviorInitializedEvent(rawTbiEvent); var behaviorName = tbiEvent.BehaviorName; var added = s_SentTrainingBehaviorInitialized.Add(behaviorName); @@ -211,9 +225,7 @@ public static void TrainingBehaviorInitialized(TrainingBehaviorInitializedEvent return; } - // Hash the behavior name so that there's no concern about PII or "secret" data being leaked. tbiEvent.TrainingSessionGuid = s_TrainingSessionGuid.ToString(); - tbiEvent.BehaviorName = AnalyticsUtils.Hash(tbiEvent.BehaviorName); // Note - to debug, use JsonUtility.ToJson on the event. // Debug.Log( @@ -236,7 +248,7 @@ IList actuators var remotePolicyEvent = new RemotePolicyInitializedEvent(); // Hash the behavior name so that there's no concern about PII or "secret" data being leaked. - remotePolicyEvent.BehaviorName = AnalyticsUtils.Hash(behaviorName); + remotePolicyEvent.BehaviorName = AnalyticsUtils.Hash(k_VendorKey, behaviorName); remotePolicyEvent.TrainingSessionGuid = s_TrainingSessionGuid.ToString(); remotePolicyEvent.ActionSpec = EventActionSpec.FromActionSpec(actionSpec); diff --git a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs index 760d5e6b25..e49bd876e7 100644 --- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs +++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs @@ -501,6 +501,7 @@ internal static TrainingEnvironmentInitializedEvent ToTrainingEnvironmentInitial TorchDeviceType = inputProto.TorchDeviceType, NumEnvironments = inputProto.NumEnvs, NumEnvironmentParameters = inputProto.NumEnvironmentParameters, + RunOptions = inputProto.RunOptions, }; } @@ -530,6 +531,7 @@ internal static TrainingBehaviorInitializedEvent ToTrainingBehaviorInitializedEv VisualEncoder = inputProto.VisualEncoder, NumNetworkLayers = inputProto.NumNetworkLayers, NumNetworkHiddenUnits = inputProto.NumNetworkHiddenUnits, + Config = inputProto.Config, }; } diff --git a/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/TrainingAnalytics.cs b/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/TrainingAnalytics.cs index 099563e949..042357f280 100644 --- a/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/TrainingAnalytics.cs +++ b/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/TrainingAnalytics.cs @@ -25,28 +25,29 @@ static TrainingAnalyticsReflection() { byte[] descriptorData = global::System.Convert.FromBase64String( string.Concat( "CjttbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL3RyYWluaW5n", - "X2FuYWx5dGljcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMi2QEKHlRy", + "X2FuYWx5dGljcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMi7gEKHlRy", "YWluaW5nRW52aXJvbm1lbnRJbml0aWFsaXplZBIYChBtbGFnZW50c192ZXJz", "aW9uGAEgASgJEh0KFW1sYWdlbnRzX2VudnNfdmVyc2lvbhgCIAEoCRIWCg5w", "eXRob25fdmVyc2lvbhgDIAEoCRIVCg10b3JjaF92ZXJzaW9uGAQgASgJEhkK", "EXRvcmNoX2RldmljZV90eXBlGAUgASgJEhAKCG51bV9lbnZzGAYgASgFEiIK", - "Gm51bV9lbnZpcm9ubWVudF9wYXJhbWV0ZXJzGAcgASgFIq0DChtUcmFpbmlu", - "Z0JlaGF2aW9ySW5pdGlhbGl6ZWQSFQoNYmVoYXZpb3JfbmFtZRgBIAEoCRIU", - "Cgx0cmFpbmVyX3R5cGUYAiABKAkSIAoYZXh0cmluc2ljX3Jld2FyZF9lbmFi", - "bGVkGAMgASgIEhsKE2dhaWxfcmV3YXJkX2VuYWJsZWQYBCABKAgSIAoYY3Vy", - "aW9zaXR5X3Jld2FyZF9lbmFibGVkGAUgASgIEhoKEnJuZF9yZXdhcmRfZW5h", - "YmxlZBgGIAEoCBIiChpiZWhhdmlvcmFsX2Nsb25pbmdfZW5hYmxlZBgHIAEo", - "CBIZChFyZWN1cnJlbnRfZW5hYmxlZBgIIAEoCBIWCg52aXN1YWxfZW5jb2Rl", - "chgJIAEoCRIaChJudW1fbmV0d29ya19sYXllcnMYCiABKAUSIAoYbnVtX25l", - "dHdvcmtfaGlkZGVuX3VuaXRzGAsgASgFEhgKEHRyYWluZXJfdGhyZWFkZWQY", - "DCABKAgSGQoRc2VsZl9wbGF5X2VuYWJsZWQYDSABKAgSGgoSY3VycmljdWx1", - "bV9lbmFibGVkGA4gASgIQiWqAiJVbml0eS5NTEFnZW50cy5Db21tdW5pY2F0", - "b3JPYmplY3RzYgZwcm90bzM=")); + "Gm51bV9lbnZpcm9ubWVudF9wYXJhbWV0ZXJzGAcgASgFEhMKC3J1bl9vcHRp", + "b25zGAggASgJIr0DChtUcmFpbmluZ0JlaGF2aW9ySW5pdGlhbGl6ZWQSFQoN", + "YmVoYXZpb3JfbmFtZRgBIAEoCRIUCgx0cmFpbmVyX3R5cGUYAiABKAkSIAoY", + "ZXh0cmluc2ljX3Jld2FyZF9lbmFibGVkGAMgASgIEhsKE2dhaWxfcmV3YXJk", + "X2VuYWJsZWQYBCABKAgSIAoYY3VyaW9zaXR5X3Jld2FyZF9lbmFibGVkGAUg", + "ASgIEhoKEnJuZF9yZXdhcmRfZW5hYmxlZBgGIAEoCBIiChpiZWhhdmlvcmFs", + "X2Nsb25pbmdfZW5hYmxlZBgHIAEoCBIZChFyZWN1cnJlbnRfZW5hYmxlZBgI", + "IAEoCBIWCg52aXN1YWxfZW5jb2RlchgJIAEoCRIaChJudW1fbmV0d29ya19s", + "YXllcnMYCiABKAUSIAoYbnVtX25ldHdvcmtfaGlkZGVuX3VuaXRzGAsgASgF", + "EhgKEHRyYWluZXJfdGhyZWFkZWQYDCABKAgSGQoRc2VsZl9wbGF5X2VuYWJs", + "ZWQYDSABKAgSGgoSY3VycmljdWx1bV9lbmFibGVkGA4gASgIEg4KBmNvbmZp", + "ZxgPIAEoCUIlqgIiVW5pdHkuTUxBZ2VudHMuQ29tbXVuaWNhdG9yT2JqZWN0", + "c2IGcHJvdG8z")); descriptor = pbr::FileDescriptor.FromGeneratedCode(descriptorData, new pbr::FileDescriptor[] { }, new pbr::GeneratedClrTypeInfo(null, new pbr::GeneratedClrTypeInfo[] { - new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.TrainingEnvironmentInitialized), global::Unity.MLAgents.CommunicatorObjects.TrainingEnvironmentInitialized.Parser, new[]{ "MlagentsVersion", "MlagentsEnvsVersion", "PythonVersion", "TorchVersion", "TorchDeviceType", "NumEnvs", "NumEnvironmentParameters" }, null, null, null), - new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.TrainingBehaviorInitialized), global::Unity.MLAgents.CommunicatorObjects.TrainingBehaviorInitialized.Parser, new[]{ "BehaviorName", "TrainerType", "ExtrinsicRewardEnabled", "GailRewardEnabled", "CuriosityRewardEnabled", "RndRewardEnabled", "BehavioralCloningEnabled", "RecurrentEnabled", "VisualEncoder", "NumNetworkLayers", "NumNetworkHiddenUnits", "TrainerThreaded", "SelfPlayEnabled", "CurriculumEnabled" }, null, null, null) + new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.TrainingEnvironmentInitialized), global::Unity.MLAgents.CommunicatorObjects.TrainingEnvironmentInitialized.Parser, new[]{ "MlagentsVersion", "MlagentsEnvsVersion", "PythonVersion", "TorchVersion", "TorchDeviceType", "NumEnvs", "NumEnvironmentParameters", "RunOptions" }, null, null, null), + new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.TrainingBehaviorInitialized), global::Unity.MLAgents.CommunicatorObjects.TrainingBehaviorInitialized.Parser, new[]{ "BehaviorName", "TrainerType", "ExtrinsicRewardEnabled", "GailRewardEnabled", "CuriosityRewardEnabled", "RndRewardEnabled", "BehavioralCloningEnabled", "RecurrentEnabled", "VisualEncoder", "NumNetworkLayers", "NumNetworkHiddenUnits", "TrainerThreaded", "SelfPlayEnabled", "CurriculumEnabled", "Config" }, null, null, null) })); } #endregion @@ -85,6 +86,7 @@ public TrainingEnvironmentInitialized(TrainingEnvironmentInitialized other) : th torchDeviceType_ = other.torchDeviceType_; numEnvs_ = other.numEnvs_; numEnvironmentParameters_ = other.numEnvironmentParameters_; + runOptions_ = other.runOptions_; _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields); } @@ -170,6 +172,17 @@ public int NumEnvironmentParameters { } } + /// Field number for the "run_options" field. + public const int RunOptionsFieldNumber = 8; + private string runOptions_ = ""; + [global::System.Diagnostics.DebuggerNonUserCodeAttribute] + public string RunOptions { + get { return runOptions_; } + set { + runOptions_ = pb::ProtoPreconditions.CheckNotNull(value, "value"); + } + } + [global::System.Diagnostics.DebuggerNonUserCodeAttribute] public override bool Equals(object other) { return Equals(other as TrainingEnvironmentInitialized); @@ -190,6 +203,7 @@ public bool Equals(TrainingEnvironmentInitialized other) { if (TorchDeviceType != other.TorchDeviceType) return false; if (NumEnvs != other.NumEnvs) return false; if (NumEnvironmentParameters != other.NumEnvironmentParameters) return false; + if (RunOptions != other.RunOptions) return false; return Equals(_unknownFields, other._unknownFields); } @@ -203,6 +217,7 @@ public override int GetHashCode() { if (TorchDeviceType.Length != 0) hash ^= TorchDeviceType.GetHashCode(); if (NumEnvs != 0) hash ^= NumEnvs.GetHashCode(); if (NumEnvironmentParameters != 0) hash ^= NumEnvironmentParameters.GetHashCode(); + if (RunOptions.Length != 0) hash ^= RunOptions.GetHashCode(); if (_unknownFields != null) { hash ^= _unknownFields.GetHashCode(); } @@ -244,6 +259,10 @@ public void WriteTo(pb::CodedOutputStream output) { output.WriteRawTag(56); output.WriteInt32(NumEnvironmentParameters); } + if (RunOptions.Length != 0) { + output.WriteRawTag(66); + output.WriteString(RunOptions); + } if (_unknownFields != null) { _unknownFields.WriteTo(output); } @@ -273,6 +292,9 @@ public int CalculateSize() { if (NumEnvironmentParameters != 0) { size += 1 + pb::CodedOutputStream.ComputeInt32Size(NumEnvironmentParameters); } + if (RunOptions.Length != 0) { + size += 1 + pb::CodedOutputStream.ComputeStringSize(RunOptions); + } if (_unknownFields != null) { size += _unknownFields.CalculateSize(); } @@ -305,6 +327,9 @@ public void MergeFrom(TrainingEnvironmentInitialized other) { if (other.NumEnvironmentParameters != 0) { NumEnvironmentParameters = other.NumEnvironmentParameters; } + if (other.RunOptions.Length != 0) { + RunOptions = other.RunOptions; + } _unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields); } @@ -344,6 +369,10 @@ public void MergeFrom(pb::CodedInputStream input) { NumEnvironmentParameters = input.ReadInt32(); break; } + case 66: { + RunOptions = input.ReadString(); + break; + } } } } @@ -389,6 +418,7 @@ public TrainingBehaviorInitialized(TrainingBehaviorInitialized other) : this() { trainerThreaded_ = other.trainerThreaded_; selfPlayEnabled_ = other.selfPlayEnabled_; curriculumEnabled_ = other.curriculumEnabled_; + config_ = other.config_; _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields); } @@ -551,6 +581,17 @@ public bool CurriculumEnabled { } } + /// Field number for the "config" field. + public const int ConfigFieldNumber = 15; + private string config_ = ""; + [global::System.Diagnostics.DebuggerNonUserCodeAttribute] + public string Config { + get { return config_; } + set { + config_ = pb::ProtoPreconditions.CheckNotNull(value, "value"); + } + } + [global::System.Diagnostics.DebuggerNonUserCodeAttribute] public override bool Equals(object other) { return Equals(other as TrainingBehaviorInitialized); @@ -578,6 +619,7 @@ public bool Equals(TrainingBehaviorInitialized other) { if (TrainerThreaded != other.TrainerThreaded) return false; if (SelfPlayEnabled != other.SelfPlayEnabled) return false; if (CurriculumEnabled != other.CurriculumEnabled) return false; + if (Config != other.Config) return false; return Equals(_unknownFields, other._unknownFields); } @@ -598,6 +640,7 @@ public override int GetHashCode() { if (TrainerThreaded != false) hash ^= TrainerThreaded.GetHashCode(); if (SelfPlayEnabled != false) hash ^= SelfPlayEnabled.GetHashCode(); if (CurriculumEnabled != false) hash ^= CurriculumEnabled.GetHashCode(); + if (Config.Length != 0) hash ^= Config.GetHashCode(); if (_unknownFields != null) { hash ^= _unknownFields.GetHashCode(); } @@ -667,6 +710,10 @@ public void WriteTo(pb::CodedOutputStream output) { output.WriteRawTag(112); output.WriteBool(CurriculumEnabled); } + if (Config.Length != 0) { + output.WriteRawTag(122); + output.WriteString(Config); + } if (_unknownFields != null) { _unknownFields.WriteTo(output); } @@ -717,6 +764,9 @@ public int CalculateSize() { if (CurriculumEnabled != false) { size += 1 + 1; } + if (Config.Length != 0) { + size += 1 + pb::CodedOutputStream.ComputeStringSize(Config); + } if (_unknownFields != null) { size += _unknownFields.CalculateSize(); } @@ -770,6 +820,9 @@ public void MergeFrom(TrainingBehaviorInitialized other) { if (other.CurriculumEnabled != false) { CurriculumEnabled = other.CurriculumEnabled; } + if (other.Config.Length != 0) { + Config = other.Config; + } _unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields); } @@ -837,6 +890,10 @@ public void MergeFrom(pb::CodedInputStream input) { CurriculumEnabled = input.ReadBool(); break; } + case 122: { + Config = input.ReadString(); + break; + } } } } diff --git a/com.unity.ml-agents/Tests/Editor/Analytics/TrainingAnalyticsTest.cs b/com.unity.ml-agents/Tests/Editor/Analytics/TrainingAnalyticsTest.cs index 1010b4549b..0487a7a524 100644 --- a/com.unity.ml-agents/Tests/Editor/Analytics/TrainingAnalyticsTest.cs +++ b/com.unity.ml-agents/Tests/Editor/Analytics/TrainingAnalyticsTest.cs @@ -70,6 +70,19 @@ public void TestRemotePolicy() Academy.Instance.Dispose(); } + [TestCase("a name we expect to hash", ExpectedResult = "d084a8b6da6a6a1c097cdc9ffea95e1546da4647352113ed77cbe7b4192e6d73")] + [TestCase("another_name", ExpectedResult = "0b74613c872e79aba11e06eda3538f2b646eb2b459e75087829ea500bd703d0b")] + [TestCase("0b74613c872e79aba11e06eda3538f2b646eb2b459e75087829ea500bd703d0b", ExpectedResult = "0b74613c872e79aba11e06eda3538f2b646eb2b459e75087829ea500bd703d0b")] + public string TestTrainingBehaviorInitialized(string stringToMaybeHash) + { + var tbiEvent = new TrainingBehaviorInitializedEvent(); + tbiEvent.BehaviorName = stringToMaybeHash; + tbiEvent.Config = "{}"; + + var sanitizedEvent = TrainingAnalytics.SanitizeTrainingBehaviorInitializedEvent(tbiEvent); + return sanitizedEvent.BehaviorName; + } + [Test] public void TestEnableAnalytics() { diff --git a/ml-agents-envs/mlagents_envs/communicator_objects/training_analytics_pb2.py b/ml-agents-envs/mlagents_envs/communicator_objects/training_analytics_pb2.py index 1e775c9710..2701dac858 100644 --- a/ml-agents-envs/mlagents_envs/communicator_objects/training_analytics_pb2.py +++ b/ml-agents-envs/mlagents_envs/communicator_objects/training_analytics_pb2.py @@ -19,7 +19,7 @@ name='mlagents_envs/communicator_objects/training_analytics.proto', package='communicator_objects', syntax='proto3', - serialized_pb=_b('\n;mlagents_envs/communicator_objects/training_analytics.proto\x12\x14\x63ommunicator_objects\"\xd9\x01\n\x1eTrainingEnvironmentInitialized\x12\x18\n\x10mlagents_version\x18\x01 \x01(\t\x12\x1d\n\x15mlagents_envs_version\x18\x02 \x01(\t\x12\x16\n\x0epython_version\x18\x03 \x01(\t\x12\x15\n\rtorch_version\x18\x04 \x01(\t\x12\x19\n\x11torch_device_type\x18\x05 \x01(\t\x12\x10\n\x08num_envs\x18\x06 \x01(\x05\x12\"\n\x1anum_environment_parameters\x18\x07 \x01(\x05\"\xad\x03\n\x1bTrainingBehaviorInitialized\x12\x15\n\rbehavior_name\x18\x01 \x01(\t\x12\x14\n\x0ctrainer_type\x18\x02 \x01(\t\x12 \n\x18\x65xtrinsic_reward_enabled\x18\x03 \x01(\x08\x12\x1b\n\x13gail_reward_enabled\x18\x04 \x01(\x08\x12 \n\x18\x63uriosity_reward_enabled\x18\x05 \x01(\x08\x12\x1a\n\x12rnd_reward_enabled\x18\x06 \x01(\x08\x12\"\n\x1a\x62\x65havioral_cloning_enabled\x18\x07 \x01(\x08\x12\x19\n\x11recurrent_enabled\x18\x08 \x01(\x08\x12\x16\n\x0evisual_encoder\x18\t \x01(\t\x12\x1a\n\x12num_network_layers\x18\n \x01(\x05\x12 \n\x18num_network_hidden_units\x18\x0b \x01(\x05\x12\x18\n\x10trainer_threaded\x18\x0c \x01(\x08\x12\x19\n\x11self_play_enabled\x18\r \x01(\x08\x12\x1a\n\x12\x63urriculum_enabled\x18\x0e \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3') + serialized_pb=_b('\n;mlagents_envs/communicator_objects/training_analytics.proto\x12\x14\x63ommunicator_objects\"\xee\x01\n\x1eTrainingEnvironmentInitialized\x12\x18\n\x10mlagents_version\x18\x01 \x01(\t\x12\x1d\n\x15mlagents_envs_version\x18\x02 \x01(\t\x12\x16\n\x0epython_version\x18\x03 \x01(\t\x12\x15\n\rtorch_version\x18\x04 \x01(\t\x12\x19\n\x11torch_device_type\x18\x05 \x01(\t\x12\x10\n\x08num_envs\x18\x06 \x01(\x05\x12\"\n\x1anum_environment_parameters\x18\x07 \x01(\x05\x12\x13\n\x0brun_options\x18\x08 \x01(\t\"\xbd\x03\n\x1bTrainingBehaviorInitialized\x12\x15\n\rbehavior_name\x18\x01 \x01(\t\x12\x14\n\x0ctrainer_type\x18\x02 \x01(\t\x12 \n\x18\x65xtrinsic_reward_enabled\x18\x03 \x01(\x08\x12\x1b\n\x13gail_reward_enabled\x18\x04 \x01(\x08\x12 \n\x18\x63uriosity_reward_enabled\x18\x05 \x01(\x08\x12\x1a\n\x12rnd_reward_enabled\x18\x06 \x01(\x08\x12\"\n\x1a\x62\x65havioral_cloning_enabled\x18\x07 \x01(\x08\x12\x19\n\x11recurrent_enabled\x18\x08 \x01(\x08\x12\x16\n\x0evisual_encoder\x18\t \x01(\t\x12\x1a\n\x12num_network_layers\x18\n \x01(\x05\x12 \n\x18num_network_hidden_units\x18\x0b \x01(\x05\x12\x18\n\x10trainer_threaded\x18\x0c \x01(\x08\x12\x19\n\x11self_play_enabled\x18\r \x01(\x08\x12\x1a\n\x12\x63urriculum_enabled\x18\x0e \x01(\x08\x12\x0e\n\x06\x63onfig\x18\x0f \x01(\tB%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3') ) @@ -81,6 +81,13 @@ message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='run_options', full_name='communicator_objects.TrainingEnvironmentInitialized.run_options', index=7, + number=8, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), ], extensions=[ ], @@ -94,7 +101,7 @@ oneofs=[ ], serialized_start=86, - serialized_end=303, + serialized_end=324, ) @@ -203,6 +210,13 @@ message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='config', full_name='communicator_objects.TrainingBehaviorInitialized.config', index=14, + number=15, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + options=None, file=DESCRIPTOR), ], extensions=[ ], @@ -215,8 +229,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=306, - serialized_end=735, + serialized_start=327, + serialized_end=772, ) DESCRIPTOR.message_types_by_name['TrainingEnvironmentInitialized'] = _TRAININGENVIRONMENTINITIALIZED diff --git a/ml-agents-envs/mlagents_envs/communicator_objects/training_analytics_pb2.pyi b/ml-agents-envs/mlagents_envs/communicator_objects/training_analytics_pb2.pyi index a347de6874..53709ca8e8 100644 --- a/ml-agents-envs/mlagents_envs/communicator_objects/training_analytics_pb2.pyi +++ b/ml-agents-envs/mlagents_envs/communicator_objects/training_analytics_pb2.pyi @@ -33,6 +33,7 @@ class TrainingEnvironmentInitialized(google___protobuf___message___Message): torch_device_type = ... # type: typing___Text num_envs = ... # type: builtin___int num_environment_parameters = ... # type: builtin___int + run_options = ... # type: typing___Text def __init__(self, *, @@ -43,15 +44,16 @@ class TrainingEnvironmentInitialized(google___protobuf___message___Message): torch_device_type : typing___Optional[typing___Text] = None, num_envs : typing___Optional[builtin___int] = None, num_environment_parameters : typing___Optional[builtin___int] = None, + run_options : typing___Optional[typing___Text] = None, ) -> None: ... @classmethod def FromString(cls, s: builtin___bytes) -> TrainingEnvironmentInitialized: ... def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ... def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... if sys.version_info >= (3,): - def ClearField(self, field_name: typing_extensions___Literal[u"mlagents_envs_version",u"mlagents_version",u"num_environment_parameters",u"num_envs",u"python_version",u"torch_device_type",u"torch_version"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"mlagents_envs_version",u"mlagents_version",u"num_environment_parameters",u"num_envs",u"python_version",u"run_options",u"torch_device_type",u"torch_version"]) -> None: ... else: - def ClearField(self, field_name: typing_extensions___Literal[u"mlagents_envs_version",b"mlagents_envs_version",u"mlagents_version",b"mlagents_version",u"num_environment_parameters",b"num_environment_parameters",u"num_envs",b"num_envs",u"python_version",b"python_version",u"torch_device_type",b"torch_device_type",u"torch_version",b"torch_version"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"mlagents_envs_version",b"mlagents_envs_version",u"mlagents_version",b"mlagents_version",u"num_environment_parameters",b"num_environment_parameters",u"num_envs",b"num_envs",u"python_version",b"python_version",u"run_options",b"run_options",u"torch_device_type",b"torch_device_type",u"torch_version",b"torch_version"]) -> None: ... class TrainingBehaviorInitialized(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... @@ -69,6 +71,7 @@ class TrainingBehaviorInitialized(google___protobuf___message___Message): trainer_threaded = ... # type: builtin___bool self_play_enabled = ... # type: builtin___bool curriculum_enabled = ... # type: builtin___bool + config = ... # type: typing___Text def __init__(self, *, @@ -86,12 +89,13 @@ class TrainingBehaviorInitialized(google___protobuf___message___Message): trainer_threaded : typing___Optional[builtin___bool] = None, self_play_enabled : typing___Optional[builtin___bool] = None, curriculum_enabled : typing___Optional[builtin___bool] = None, + config : typing___Optional[typing___Text] = None, ) -> None: ... @classmethod def FromString(cls, s: builtin___bytes) -> TrainingBehaviorInitialized: ... def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ... def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... if sys.version_info >= (3,): - def ClearField(self, field_name: typing_extensions___Literal[u"behavior_name",u"behavioral_cloning_enabled",u"curiosity_reward_enabled",u"curriculum_enabled",u"extrinsic_reward_enabled",u"gail_reward_enabled",u"num_network_hidden_units",u"num_network_layers",u"recurrent_enabled",u"rnd_reward_enabled",u"self_play_enabled",u"trainer_threaded",u"trainer_type",u"visual_encoder"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"behavior_name",u"behavioral_cloning_enabled",u"config",u"curiosity_reward_enabled",u"curriculum_enabled",u"extrinsic_reward_enabled",u"gail_reward_enabled",u"num_network_hidden_units",u"num_network_layers",u"recurrent_enabled",u"rnd_reward_enabled",u"self_play_enabled",u"trainer_threaded",u"trainer_type",u"visual_encoder"]) -> None: ... else: - def ClearField(self, field_name: typing_extensions___Literal[u"behavior_name",b"behavior_name",u"behavioral_cloning_enabled",b"behavioral_cloning_enabled",u"curiosity_reward_enabled",b"curiosity_reward_enabled",u"curriculum_enabled",b"curriculum_enabled",u"extrinsic_reward_enabled",b"extrinsic_reward_enabled",u"gail_reward_enabled",b"gail_reward_enabled",u"num_network_hidden_units",b"num_network_hidden_units",u"num_network_layers",b"num_network_layers",u"recurrent_enabled",b"recurrent_enabled",u"rnd_reward_enabled",b"rnd_reward_enabled",u"self_play_enabled",b"self_play_enabled",u"trainer_threaded",b"trainer_threaded",u"trainer_type",b"trainer_type",u"visual_encoder",b"visual_encoder"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"behavior_name",b"behavior_name",u"behavioral_cloning_enabled",b"behavioral_cloning_enabled",u"config",b"config",u"curiosity_reward_enabled",b"curiosity_reward_enabled",u"curriculum_enabled",b"curriculum_enabled",u"extrinsic_reward_enabled",b"extrinsic_reward_enabled",u"gail_reward_enabled",b"gail_reward_enabled",u"num_network_hidden_units",b"num_network_hidden_units",u"num_network_layers",b"num_network_layers",u"recurrent_enabled",b"recurrent_enabled",u"rnd_reward_enabled",b"rnd_reward_enabled",u"self_play_enabled",b"self_play_enabled",u"trainer_threaded",b"trainer_threaded",u"trainer_type",b"trainer_type",u"visual_encoder",b"visual_encoder"]) -> None: ... diff --git a/ml-agents/mlagents/trainers/subprocess_env_manager.py b/ml-agents/mlagents/trainers/subprocess_env_manager.py index e96703f62c..736d7b5091 100644 --- a/ml-agents/mlagents/trainers/subprocess_env_manager.py +++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py @@ -37,7 +37,9 @@ EnvironmentStats, StatsSideChannel, ) -from mlagents.training_analytics_side_channel import TrainingAnalyticsSideChannel +from mlagents.trainers.training_analytics_side_channel import ( + TrainingAnalyticsSideChannel, +) from mlagents_envs.side_channel.side_channel import SideChannel diff --git a/ml-agents/mlagents/trainers/tests/test_training_analytics_side_channel.py b/ml-agents/mlagents/trainers/tests/test_training_analytics_side_channel.py new file mode 100644 index 0000000000..0dcba85d1f --- /dev/null +++ b/ml-agents/mlagents/trainers/tests/test_training_analytics_side_channel.py @@ -0,0 +1,48 @@ +import yaml +from mlagents.trainers.settings import RunOptions +from mlagents.trainers.training_analytics_side_channel import ( + TrainingAnalyticsSideChannel, +) + +test_curriculum_config_yaml = """ +environment_parameters: + param_1: + curriculum: + - name: Lesson1 + completion_criteria: + measure: reward + behavior: fake_behavior + threshold: 30 + min_lesson_length: 100 + require_reset: true + value: 1 + - name: Lesson2 + completion_criteria: + measure: reward + behavior: fake_behavior + threshold: 60 + min_lesson_length: 100 + require_reset: false + value: 2 + - name: Lesson3 + value: + sampler_type: uniform + sampler_parameters: + min_value: 1 + max_value: 3 +""" + + +def test_sanitize_run_options(): + run_options = RunOptions.from_dict(yaml.safe_load(test_curriculum_config_yaml)) + sanitized = TrainingAnalyticsSideChannel._sanitize_run_options(run_options) + assert "param_1" not in sanitized["environment_parameters"] + assert "fake_behavior" not in sanitized["environment_parameters"] + assert ( + TrainingAnalyticsSideChannel._hash("param_1") + in sanitized["environment_parameters"] + ) + level1 = TrainingAnalyticsSideChannel._hash("param_1") + assert sanitized["environment_parameters"][level1]["curriculum"][0][ + "completion_criteria" + ]["behavior"] == TrainingAnalyticsSideChannel._hash("fake_behavior") diff --git a/ml-agents/mlagents/training_analytics_side_channel.py b/ml-agents/mlagents/trainers/training_analytics_side_channel.py similarity index 51% rename from ml-agents/mlagents/training_analytics_side_channel.py rename to ml-agents/mlagents/trainers/training_analytics_side_channel.py index b2adfbc438..dd6d99ea74 100644 --- a/ml-agents/mlagents/training_analytics_side_channel.py +++ b/ml-agents/mlagents/trainers/training_analytics_side_channel.py @@ -1,5 +1,9 @@ +import copy +import json +import hmac +import hashlib import sys -from typing import Optional +from typing import Optional, Dict import mlagents_envs import mlagents.trainers from mlagents import torch_utils @@ -24,6 +28,8 @@ class TrainingAnalyticsSideChannel(DefaultTrainingAnalyticsSideChannel): Side channel that sends information about the training to the Unity environment so it can be logged. """ + __vendorKey: str = "unity.ml-agents" + def __init__(self) -> None: # >>> uuid.uuid5(uuid.NAMESPACE_URL, "com.unity.ml-agents/TrainingAnalyticsSideChannel") # UUID('b664a4a9-d86f-5a5f-95cb-e8353a7e8356') @@ -31,17 +37,82 @@ def __init__(self) -> None: super().__init__() self.run_options: Optional[RunOptions] = None + @classmethod + def _hash(cls, data: str) -> str: + res = hmac.new( + cls.__vendorKey.encode("utf-8"), data.encode("utf-8"), hashlib.sha256 + ).hexdigest() + return res + def on_message_received(self, msg: IncomingMessage) -> None: raise UnityCommunicationException( "The TrainingAnalyticsSideChannel received a message from Unity, " + "this should not have happened." ) + @classmethod + def _sanitize_run_options(cls, config: RunOptions) -> Dict[str, Any]: + res = copy.deepcopy(config.as_dict()) + + # Filter potentially PII behavior names + if "behaviors" in res and res["behaviors"]: + res["behaviors"] = {cls._hash(k): v for (k, v) in res["behaviors"].items()} + for (k, v) in res["behaviors"].items(): + if "init_path" in v and v["init_path"] is not None: + hashed_path = cls._hash(v["init_path"]) + res["behaviors"][k]["init_path"] = hashed_path + if "demo_path" in v and v["demo_path"] is not None: + hashed_path = cls._hash(v["demo_path"]) + res["behaviors"][k]["demo_path"] = hashed_path + + # Filter potentially PII curriculum and behavior names from Checkpoint Settings + if "environment_parameters" in res and res["environment_parameters"]: + res["environment_parameters"] = { + cls._hash(k): v for (k, v) in res["environment_parameters"].items() + } + for (curriculumName, curriculum) in res["environment_parameters"].items(): + updated_lessons = [] + for lesson in curriculum["curriculum"]: + new_lesson = copy.deepcopy(lesson) + if "name" in lesson: + new_lesson["name"] = cls._hash(lesson["name"]) + if ( + "completion_criteria" in lesson + and lesson["completion_criteria"] is not None + ): + new_lesson["completion_criteria"]["behavior"] = cls._hash( + new_lesson["completion_criteria"]["behavior"] + ) + updated_lessons.append(new_lesson) + res["environment_parameters"][curriculumName][ + "curriculum" + ] = updated_lessons + + # Filter potentially PII filenames from Checkpoint Settings + if "checkpoint_settings" in res and res["checkpoint_settings"] is not None: + if ( + "initialize_from" in res["checkpoint_settings"] + and res["checkpoint_settings"]["initialize_from"] is not None + ): + res["checkpoint_settings"]["initialize_from"] = cls._hash( + res["checkpoint_settings"]["initialize_from"] + ) + if ( + "results_dir" in res["checkpoint_settings"] + and res["checkpoint_settings"]["results_dir"] is not None + ): + res["checkpoint_settings"]["results_dir"] = hash( + res["checkpoint_settings"]["results_dir"] + ) + + return res + def environment_initialized(self, run_options: RunOptions) -> None: self.run_options = run_options # Tuple of (major, minor, patch) vi = sys.version_info env_params = run_options.environment_parameters + sanitized_run_options = self._sanitize_run_options(run_options) msg = TrainingEnvironmentInitialized( python_version=f"{vi[0]}.{vi[1]}.{vi[2]}", @@ -51,6 +122,7 @@ def environment_initialized(self, run_options: RunOptions) -> None: torch_device_type=torch_utils.default_device().type, num_envs=run_options.env_settings.num_envs, num_environment_parameters=len(env_params) if env_params else 0, + run_options=json.dumps(sanitized_run_options), ) any_message = Any() @@ -60,9 +132,21 @@ def environment_initialized(self, run_options: RunOptions) -> None: env_init_msg.set_raw_bytes(any_message.SerializeToString()) super().queue_message_to_send(env_init_msg) + @classmethod + def _sanitize_trainer_settings(cls, config: TrainerSettings) -> Dict[str, Any]: + config_dict = copy.deepcopy(config.as_dict()) + if "init_path" in config_dict and config_dict["init_path"] is not None: + hashed_path = cls._hash(config_dict["init_path"]) + config_dict["init_path"] = hashed_path + if "demo_path" in config_dict and config_dict["demo_path"] is not None: + hashed_path = cls._hash(config_dict["demo_path"]) + config_dict["demo_path"] = hashed_path + return config_dict + def training_started(self, behavior_name: str, config: TrainerSettings) -> None: + raw_config = self._sanitize_trainer_settings(config) msg = TrainingBehaviorInitialized( - behavior_name=behavior_name, + behavior_name=self._hash(behavior_name), trainer_type=config.trainer_type.value, extrinsic_reward_enabled=( RewardSignalType.EXTRINSIC in config.reward_signals @@ -80,6 +164,7 @@ def training_started(self, behavior_name: str, config: TrainerSettings) -> None: trainer_threaded=config.threaded, self_play_enabled=config.self_play is not None, curriculum_enabled=self._behavior_uses_curriculum(behavior_name), + config=json.dumps(raw_config), ) any_message = Any() diff --git a/protobuf-definitions/proto/mlagents_envs/communicator_objects/training_analytics.proto b/protobuf-definitions/proto/mlagents_envs/communicator_objects/training_analytics.proto index 52f0ed6109..08905c516b 100644 --- a/protobuf-definitions/proto/mlagents_envs/communicator_objects/training_analytics.proto +++ b/protobuf-definitions/proto/mlagents_envs/communicator_objects/training_analytics.proto @@ -11,6 +11,7 @@ message TrainingEnvironmentInitialized { string torch_device_type = 5; int32 num_envs = 6; int32 num_environment_parameters = 7; + string run_options = 8; } message TrainingBehaviorInitialized { @@ -28,4 +29,5 @@ message TrainingBehaviorInitialized { bool trainer_threaded = 12; bool self_play_enabled = 13; bool curriculum_enabled = 14; + string config = 15; }