From d0f7054b9aaab1bfaa3e3722ae2a576e1af9bea0 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Wed, 21 Oct 2020 13:01:48 -0700 Subject: [PATCH 01/26] Use ctx.CalncelExecution() to fix AutoML max-time experiment bug --- .../Experiment/Experiment.cs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs index c845dca14f..509a208d17 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs @@ -7,6 +7,7 @@ using System.Diagnostics; using System.IO; using System.Linq; +using System.Timers; using Microsoft.ML.Runtime; namespace Microsoft.ML.AutoML @@ -51,10 +52,23 @@ public Experiment(MLContext context, _logger = logger; } + private void MaxExperimentTimeExpiredEvent(object sender, EventArgs e) + { + _logger.Warning("Allocated time for Experiment of {0} seconds has elapsed with {1} models run. Ending experiment...", + _experimentSettings.MaxExperimentTimeInSeconds, _history.Count()); + _context.CancelExecution(); + } + public IList Execute() { - var stopwatch = Stopwatch.StartNew(); var iterationResults = new List(); + // Create a timer for the max duration of experiment. When given time has + // elapsed, MaxExperimentTimeExpiredEvent is called to interrupt training + // of current model. + Timer timer = new Timer(_experimentSettings.MaxExperimentTimeInSeconds * 1000); + timer.Elapsed += MaxExperimentTimeExpiredEvent; + timer.AutoReset = false; + timer.Enabled = true; do { @@ -100,8 +114,7 @@ public IList Execute() } } while (_history.Count < _experimentSettings.MaxModels && - !_experimentSettings.CancellationToken.IsCancellationRequested && - stopwatch.Elapsed.TotalSeconds < _experimentSettings.MaxExperimentTimeInSeconds); + !_experimentSettings.CancellationToken.IsCancellationRequested); return iterationResults; } From 4fa26f82676718172c7f24e15b84a9bcbdb25635 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Wed, 21 Oct 2020 15:32:47 -0700 Subject: [PATCH 02/26] Added unit test for checking canceled experiment --- .../Experiment/Experiment.cs | 35 ++++++++++++++----- .../Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 18 ++++++++++ 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs index 509a208d17..76e3ad27bd 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs @@ -26,6 +26,7 @@ internal class Experiment where TRunDetail : RunDetail private readonly IRunner _runner; private readonly IList _history; private readonly IChannel _logger; + private bool _endExperimentWhenAble = false; public Experiment(MLContext context, TaskKind task, @@ -54,9 +55,15 @@ public Experiment(MLContext context, private void MaxExperimentTimeExpiredEvent(object sender, EventArgs e) { - _logger.Warning("Allocated time for Experiment of {0} seconds has elapsed with {1} models run. Ending experiment...", - _experimentSettings.MaxExperimentTimeInSeconds, _history.Count()); - _context.CancelExecution(); + // If at least one model was run, end experiment immediately. + // Else, wait for first model to run before experiment is concluded. + _endExperimentWhenAble = true; + if (_history.Count > 0) + { + _logger.Warning("Allocated time for Experiment of {0} seconds has elapsed with {1} models run. Ending experiment...", + _experimentSettings.MaxExperimentTimeInSeconds, _history.Count()); + _context.CancelExecution(); + } } public IList Execute() @@ -64,11 +71,20 @@ public IList Execute() var iterationResults = new List(); // Create a timer for the max duration of experiment. When given time has // elapsed, MaxExperimentTimeExpiredEvent is called to interrupt training - // of current model. - Timer timer = new Timer(_experimentSettings.MaxExperimentTimeInSeconds * 1000); - timer.Elapsed += MaxExperimentTimeExpiredEvent; - timer.AutoReset = false; - timer.Enabled = true; + // of current model. Timer is not used if no experiment time is given, or + // is not a positive number. + if (_experimentSettings.MaxExperimentTimeInSeconds > 0) + { + Timer timer = new Timer(_experimentSettings.MaxExperimentTimeInSeconds * 1000); + timer.Elapsed += MaxExperimentTimeExpiredEvent; + timer.AutoReset = false; + timer.Enabled = true; + } + // If given max duration of experiment is 0, only 1 model will be trained. + // _experimentSettings.MaxExperimentTimeInSeconds is of type uint, it is + // either 0 or >0. + else + _endExperimentWhenAble = true; do { @@ -114,7 +130,8 @@ public IList Execute() } } while (_history.Count < _experimentSettings.MaxModels && - !_experimentSettings.CancellationToken.IsCancellationRequested); + !_experimentSettings.CancellationToken.IsCancellationRequested && + !_endExperimentWhenAble); return iterationResults; } diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index 40ccfdc067..c28417015f 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -4,6 +4,7 @@ using System.Linq; using Microsoft.ML.Data; +using Microsoft.ML.Runtime; using Microsoft.ML.TestFramework; using Microsoft.ML.TestFramework.Attributes; using Microsoft.ML.TestFrameworkCommon; @@ -320,6 +321,23 @@ public void AutoFitWithPresplittedData() } + [Fact] + public void AutoFitMaxExperimentTimeTest() + { + // 1 Binary classification experiment takes less than 5 seconds. + // System.OperationCanceledException is thrown when ongoing experiment + // is canceled and at least one model has been generated. + var context = new MLContext(1); + var dataPath = DatasetUtil.GetUciAdultDataset(); + var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); + var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); + var trainData = textLoader.Load(dataPath); + var experiment = context.Auto() + .CreateBinaryClassificationExperiment(5) + .Execute(trainData, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); + Assert.True((context.Model.GetEnvironment() as ICancelable).IsCanceled); + } + private TextLoader.Options GetLoaderArgs(string labelColumnName, string userIdColumnName, string itemIdColumnName) { return new TextLoader.Options() From 48a626701950e70a014376ab8baaa26fb09ecf99 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Wed, 21 Oct 2020 16:05:11 -0700 Subject: [PATCH 03/26] Nit fix --- src/Microsoft.ML.AutoML/Experiment/Experiment.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs index 76e3ad27bd..319dd0342b 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs @@ -26,7 +26,7 @@ internal class Experiment where TRunDetail : RunDetail private readonly IRunner _runner; private readonly IList _history; private readonly IChannel _logger; - private bool _endExperimentWhenAble = false; + private bool _endExperimentWhenAble; public Experiment(MLContext context, TaskKind task, @@ -51,6 +51,7 @@ public Experiment(MLContext context, _datasetColumnInfo = datasetColumnInfo; _runner = runner; _logger = logger; + _endExperimentWhenAble = false; } private void MaxExperimentTimeExpiredEvent(object sender, EventArgs e) From f32403015c3fe8c0c06dae00de8838cbb04b675a Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Wed, 21 Oct 2020 17:08:52 -0700 Subject: [PATCH 04/26] Different run time on Linux --- test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index c28417015f..5aa10dc235 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -235,7 +235,7 @@ public void AutoFitRecommendationTest() // STEP 2: Run AutoML experiment ExperimentResult experimentResult = mlContext.Auto() - .CreateRecommendationExperiment(5) + .CreateRecommendationExperiment(25) .Execute(trainDataView, testDataView, new ColumnInformation() { From ee70024182950faa23495a9dbf42828e446400ee Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Wed, 21 Oct 2020 22:46:23 -0700 Subject: [PATCH 05/26] Review --- src/Microsoft.ML.AutoML/Experiment/Experiment.cs | 2 +- test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs index 319dd0342b..3d3a687bae 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs @@ -59,7 +59,7 @@ private void MaxExperimentTimeExpiredEvent(object sender, EventArgs e) // If at least one model was run, end experiment immediately. // Else, wait for first model to run before experiment is concluded. _endExperimentWhenAble = true; - if (_history.Count > 0) + if (_history.Any(r => r.RunSucceeded)) { _logger.Warning("Allocated time for Experiment of {0} seconds has elapsed with {1} models run. Ending experiment...", _experimentSettings.MaxExperimentTimeInSeconds, _history.Count()); diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index 5aa10dc235..a0f2e1d469 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -235,7 +235,7 @@ public void AutoFitRecommendationTest() // STEP 2: Run AutoML experiment ExperimentResult experimentResult = mlContext.Auto() - .CreateRecommendationExperiment(25) + .CreateRecommendationExperiment(50) .Execute(trainDataView, testDataView, new ColumnInformation() { From 36bf24e3059334bd1f7862a90f62061e687fdf73 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Thu, 22 Oct 2020 16:36:22 -0700 Subject: [PATCH 06/26] Testing four ouput --- src/Microsoft.ML.Core/Data/IHostEnvironment.cs | 2 +- test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Microsoft.ML.Core/Data/IHostEnvironment.cs b/src/Microsoft.ML.Core/Data/IHostEnvironment.cs index f59a37bef6..e010cf335e 100644 --- a/src/Microsoft.ML.Core/Data/IHostEnvironment.cs +++ b/src/Microsoft.ML.Core/Data/IHostEnvironment.cs @@ -72,7 +72,7 @@ public interface IHostEnvironment : IChannelProvider, IProgressChannelProvider internal interface ICancelable { /// - /// Signal to stop exection in all the hosts. + /// Signal to stop execution in all the hosts. /// void CancelExecution(); diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index a0f2e1d469..0f43426686 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -118,7 +118,7 @@ public void AutoFitRegressionTest() .Execute(trainData, validationData, new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }); - Assert.True(result.RunDetails.Max(i => i.ValidationMetrics.RSquared > 0.9)); + Assert.True(result.RunDetails.Max(i => i?.ValidationMetrics?.RSquared) > 0.9); } [LightGBMFact] @@ -235,7 +235,7 @@ public void AutoFitRecommendationTest() // STEP 2: Run AutoML experiment ExperimentResult experimentResult = mlContext.Auto() - .CreateRecommendationExperiment(50) + .CreateRecommendationExperiment(5) .Execute(trainDataView, testDataView, new ColumnInformation() { @@ -247,7 +247,8 @@ public void AutoFitRecommendationTest() RunDetail bestRun = experimentResult.BestRun; Assert.True(experimentResult.RunDetails.Count() > 1); Assert.NotNull(bestRun.ValidationMetrics); - Assert.True(experimentResult.RunDetails.Max(i => i.ValidationMetrics.RSquared != 0)); + System.Console.WriteLine("Number of models run successfully/total-tried: {0}", experimentResult.RunDetails.Select(r => r.ValidationMetrics != null && r.ValidationMetrics.RSquared != double.NaN).Count(), experimentResult.RunDetails.Count()); + Assert.True(experimentResult.RunDetails.Max(i => i?.ValidationMetrics?.RSquared) != 0); var outputSchema = bestRun.Model.GetOutputSchema(trainDataView.Schema); var expectedOutputNames = new string[] { labelColumnName, userColumnName, userColumnName, itemColumnName, itemColumnName, scoreColumnName }; From d5d23deeb53f923b80fd030cbf34d7d4022df469 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Thu, 22 Oct 2020 20:19:00 -0700 Subject: [PATCH 07/26] Used reflection to test for contexts being canceled --- .../Experiment/Experiment.cs | 26 +++++++++++++------ .../Runners/CrossValSummaryRunner.cs | 2 +- .../Experiment/SuggestedPipeline.cs | 5 ++++ .../Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 19 +++++++++++--- 4 files changed, 40 insertions(+), 12 deletions(-) diff --git a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs index 3d3a687bae..4eca529c99 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs @@ -8,6 +8,7 @@ using System.IO; using System.Linq; using System.Timers; +using Microsoft.ML.Data; using Microsoft.ML.Runtime; namespace Microsoft.ML.AutoML @@ -26,7 +27,8 @@ internal class Experiment where TRunDetail : RunDetail private readonly IRunner _runner; private readonly IList _history; private readonly IChannel _logger; - private bool _endExperimentWhenAble; + private bool _experimentTimerExpired; + private HashSet _activeMLContexts; public Experiment(MLContext context, TaskKind task, @@ -51,20 +53,23 @@ public Experiment(MLContext context, _datasetColumnInfo = datasetColumnInfo; _runner = runner; _logger = logger; - _endExperimentWhenAble = false; + _experimentTimerExpired = false; + _activeMLContexts = new HashSet(); } private void MaxExperimentTimeExpiredEvent(object sender, EventArgs e) { // If at least one model was run, end experiment immediately. // Else, wait for first model to run before experiment is concluded. - _endExperimentWhenAble = true; + _experimentTimerExpired = true; if (_history.Any(r => r.RunSucceeded)) { _logger.Warning("Allocated time for Experiment of {0} seconds has elapsed with {1} models run. Ending experiment...", _experimentSettings.MaxExperimentTimeInSeconds, _history.Count()); - _context.CancelExecution(); + foreach(MLContext c in _activeMLContexts) + c.CancelExecution(); } + _activeMLContexts.Clear(); } public IList Execute() @@ -85,7 +90,7 @@ public IList Execute() // _experimentSettings.MaxExperimentTimeInSeconds is of type uint, it is // either 0 or >0. else - _endExperimentWhenAble = true; + _experimentTimerExpired = true; do { @@ -93,7 +98,13 @@ public IList Execute() // get next pipeline var getPipelineStopwatch = Stopwatch.StartNew(); - var pipeline = PipelineSuggester.GetNextInferredPipeline(_context, _history, _datasetColumnInfo, _task, + + // A new MLContext is needed per model run. When max experiment time is reached, each used + // context is canceled to stop further model training. The cancellation of the main MLContext + // a user has instantiated is not desirable, thus additional MLContexts are used. + var activeMLContext = new MLContext(((ISeededEnvironment)_context.Model.GetEnvironment()).Seed); + _activeMLContexts.Add(activeMLContext); + var pipeline = PipelineSuggester.GetNextInferredPipeline(activeMLContext, _history, _datasetColumnInfo, _task, _optimizingMetricInfo.IsMaximizing, _experimentSettings.CacheBeforeTrainer, _trainerAllowList); var pipelineInferenceTimeInSeconds = getPipelineStopwatch.Elapsed.TotalSeconds; @@ -132,8 +143,7 @@ public IList Execute() } while (_history.Count < _experimentSettings.MaxModels && !_experimentSettings.CancellationToken.IsCancellationRequested && - !_endExperimentWhenAble); - + !_experimentTimerExpired); return iterationResults; } diff --git a/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs b/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs index 3697268936..04accc4754 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValSummaryRunner.cs @@ -58,7 +58,7 @@ public CrossValSummaryRunner(MLContext context, for (var i = 0; i < _trainDatasets.Length; i++) { var modelFileInfo = RunnerUtil.GetModelFileInfo(modelDirectory, iterationNum, i + 1); - var trainResult = RunnerUtil.TrainAndScorePipeline(_context, pipeline, _trainDatasets[i], _validDatasets[i], + var trainResult = RunnerUtil.TrainAndScorePipeline(pipeline.GetContext(), pipeline, _trainDatasets[i], _validDatasets[i], _groupIdColumn, _labelColumn, _metricsAgent, _preprocessorTransforms?.ElementAt(i), modelFileInfo, _modelInputSchema, _logger); trainResults.Add(trainResult); diff --git a/src/Microsoft.ML.AutoML/Experiment/SuggestedPipeline.cs b/src/Microsoft.ML.AutoML/Experiment/SuggestedPipeline.cs index ff97bd9cee..3c35118a76 100644 --- a/src/Microsoft.ML.AutoML/Experiment/SuggestedPipeline.cs +++ b/src/Microsoft.ML.AutoML/Experiment/SuggestedPipeline.cs @@ -52,6 +52,11 @@ public override int GetHashCode() return ToString().GetHashCode(); } + public MLContext GetContext() + { + return _context; + } + public Pipeline ToPipeline() { var pipelineElements = new List(); diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index 0f43426686..00b237ce72 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -2,13 +2,14 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Linq; +using System.Reflection; using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.TestFramework; using Microsoft.ML.TestFramework.Attributes; using Microsoft.ML.TestFrameworkCommon; -using Microsoft.ML.Trainers.LightGbm; using Xunit; using Xunit.Abstractions; using static Microsoft.ML.DataOperationsCatalog; @@ -247,7 +248,6 @@ public void AutoFitRecommendationTest() RunDetail bestRun = experimentResult.BestRun; Assert.True(experimentResult.RunDetails.Count() > 1); Assert.NotNull(bestRun.ValidationMetrics); - System.Console.WriteLine("Number of models run successfully/total-tried: {0}", experimentResult.RunDetails.Select(r => r.ValidationMetrics != null && r.ValidationMetrics.RSquared != double.NaN).Count(), experimentResult.RunDetails.Count()); Assert.True(experimentResult.RunDetails.Max(i => i?.ValidationMetrics?.RSquared) != 0); var outputSchema = bestRun.Model.GetOutputSchema(trainDataView.Schema); @@ -336,7 +336,20 @@ public void AutoFitMaxExperimentTimeTest() var experiment = context.Auto() .CreateBinaryClassificationExperiment(5) .Execute(trainData, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); - Assert.True((context.Model.GetEnvironment() as ICancelable).IsCanceled); + RunDetail[] runDetails = experiment.RunDetails.Where(r => r.Model != null).ToArray(); + foreach(RunDetail runDetail in runDetails) + { + ModelContainer modelContainer = GetInstanceField(typeof(RunDetail), runDetail, "_modelContainer") as ModelContainer; + MLContext thisContext = GetInstanceField(typeof(ModelContainer), modelContainer, "_mlContext") as MLContext; + Assert.True((thisContext.Model.GetEnvironment() as ICancelable).IsCanceled); + } + } + + private static object GetInstanceField(Type type, object instance, string fieldName) + { + BindingFlags bindFlags = BindingFlags.Instance | BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static; + FieldInfo field = type.GetField(fieldName, bindFlags); + return field.GetValue(instance); } private TextLoader.Options GetLoaderArgs(string labelColumnName, string userIdColumnName, string itemIdColumnName) From 33cf5a6219f9510a7b76bdd3062f124a82dd9500 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Mon, 26 Oct 2020 02:02:27 -0700 Subject: [PATCH 08/26] Reviews --- .../Experiment/Experiment.cs | 14 +++------ .../Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 29 +++++++++---------- 2 files changed, 17 insertions(+), 26 deletions(-) diff --git a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs index 4eca529c99..d371362069 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs @@ -28,7 +28,7 @@ internal class Experiment where TRunDetail : RunDetail private readonly IList _history; private readonly IChannel _logger; private bool _experimentTimerExpired; - private HashSet _activeMLContexts; + private MLContext _currentModelMLContext; public Experiment(MLContext context, TaskKind task, @@ -54,7 +54,6 @@ public Experiment(MLContext context, _runner = runner; _logger = logger; _experimentTimerExpired = false; - _activeMLContexts = new HashSet(); } private void MaxExperimentTimeExpiredEvent(object sender, EventArgs e) @@ -66,10 +65,8 @@ private void MaxExperimentTimeExpiredEvent(object sender, EventArgs e) { _logger.Warning("Allocated time for Experiment of {0} seconds has elapsed with {1} models run. Ending experiment...", _experimentSettings.MaxExperimentTimeInSeconds, _history.Count()); - foreach(MLContext c in _activeMLContexts) - c.CancelExecution(); + _currentModelMLContext.CancelExecution(); } - _activeMLContexts.Clear(); } public IList Execute() @@ -102,13 +99,10 @@ public IList Execute() // A new MLContext is needed per model run. When max experiment time is reached, each used // context is canceled to stop further model training. The cancellation of the main MLContext // a user has instantiated is not desirable, thus additional MLContexts are used. - var activeMLContext = new MLContext(((ISeededEnvironment)_context.Model.GetEnvironment()).Seed); - _activeMLContexts.Add(activeMLContext); - var pipeline = PipelineSuggester.GetNextInferredPipeline(activeMLContext, _history, _datasetColumnInfo, _task, + _currentModelMLContext = new MLContext(((ISeededEnvironment)_context.Model.GetEnvironment()).Seed); + var pipeline = PipelineSuggester.GetNextInferredPipeline(_currentModelMLContext, _history, _datasetColumnInfo, _task, _optimizingMetricInfo.IsMaximizing, _experimentSettings.CacheBeforeTrainer, _trainerAllowList); - var pipelineInferenceTimeInSeconds = getPipelineStopwatch.Elapsed.TotalSeconds; - // break if no candidates returned, means no valid pipeline available if (pipeline == null) { diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index 00b237ce72..d86eedd27d 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -248,7 +248,7 @@ public void AutoFitRecommendationTest() RunDetail bestRun = experimentResult.BestRun; Assert.True(experimentResult.RunDetails.Count() > 1); Assert.NotNull(bestRun.ValidationMetrics); - Assert.True(experimentResult.RunDetails.Max(i => i?.ValidationMetrics?.RSquared) != 0); + Assert.True(experimentResult.RunDetails.Max(i => i?.ValidationMetrics?.RSquared* i?.ValidationMetrics?.RSquared) > 0.5); var outputSchema = bestRun.Model.GetOutputSchema(trainDataView.Schema); var expectedOutputNames = new string[] { labelColumnName, userColumnName, userColumnName, itemColumnName, itemColumnName, scoreColumnName }; @@ -325,7 +325,7 @@ public void AutoFitWithPresplittedData() [Fact] public void AutoFitMaxExperimentTimeTest() { - // 1 Binary classification experiment takes less than 5 seconds. + // A single binary classification experiment takes less than 5 seconds. // System.OperationCanceledException is thrown when ongoing experiment // is canceled and at least one model has been generated. var context = new MLContext(1); @@ -334,22 +334,19 @@ public void AutoFitMaxExperimentTimeTest() var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var experiment = context.Auto() - .CreateBinaryClassificationExperiment(5) + .CreateBinaryClassificationExperiment(10) .Execute(trainData, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); - RunDetail[] runDetails = experiment.RunDetails.Where(r => r.Model != null).ToArray(); - foreach(RunDetail runDetail in runDetails) - { - ModelContainer modelContainer = GetInstanceField(typeof(RunDetail), runDetail, "_modelContainer") as ModelContainer; - MLContext thisContext = GetInstanceField(typeof(ModelContainer), modelContainer, "_mlContext") as MLContext; - Assert.True((thisContext.Model.GetEnvironment() as ICancelable).IsCanceled); - } - } - private static object GetInstanceField(Type type, object instance, string fieldName) - { - BindingFlags bindFlags = BindingFlags.Instance | BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static; - FieldInfo field = type.GetField(fieldName, bindFlags); - return field.GetValue(instance); + // Ensure the (last) model that was training when maximum experiment time was reached has been stopped, + // and that its MLContext has been canceled. + Assert.True(experiment.RunDetails.Last().Exception.Message == "Operation was canceled.", + "Training process was not successfully canceled after maximum experiment time was reached."); + + // Ensure that the best found model can still run after maximum experiment time was reached. + var refitModel = experiment.BestRun.Estimator.Fit(trainData); + IDataView predictions = refitModel.Transform(trainData); + var metrics = context.BinaryClassification.Evaluate(predictions, labelColumnName: DatasetUtil.UciAdultLabel); + Assert.True(metrics?.Accuracy > 0.5); } private TextLoader.Options GetLoaderArgs(string labelColumnName, string userIdColumnName, string itemIdColumnName) From c69a19f931977c7bd4cc5d4a26e8878182612f4a Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Wed, 28 Oct 2020 14:51:50 -0700 Subject: [PATCH 09/26] Reviews --- src/Microsoft.ML.AutoML/Experiment/Experiment.cs | 12 ++++++------ test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs index d371362069..524d6e7bd7 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs @@ -7,7 +7,7 @@ using System.Diagnostics; using System.IO; using System.Linq; -using System.Timers; +using System.Threading; using Microsoft.ML.Data; using Microsoft.ML.Runtime; @@ -56,7 +56,7 @@ public Experiment(MLContext context, _experimentTimerExpired = false; } - private void MaxExperimentTimeExpiredEvent(object sender, EventArgs e) + private void MaxExperimentTimeExpiredEvent(object state) { // If at least one model was run, end experiment immediately. // Else, wait for first model to run before experiment is concluded. @@ -78,10 +78,10 @@ public IList Execute() // is not a positive number. if (_experimentSettings.MaxExperimentTimeInSeconds > 0) { - Timer timer = new Timer(_experimentSettings.MaxExperimentTimeInSeconds * 1000); - timer.Elapsed += MaxExperimentTimeExpiredEvent; - timer.AutoReset = false; - timer.Enabled = true; + Timer timer = new Timer( + new TimerCallback(MaxExperimentTimeExpiredEvent), null, + _experimentSettings.MaxExperimentTimeInSeconds * 1000, Timeout.Infinite + ); } // If given max duration of experiment is 0, only 1 model will be trained. // _experimentSettings.MaxExperimentTimeInSeconds is of type uint, it is diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index d86eedd27d..273fa38243 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -339,7 +339,7 @@ public void AutoFitMaxExperimentTimeTest() // Ensure the (last) model that was training when maximum experiment time was reached has been stopped, // and that its MLContext has been canceled. - Assert.True(experiment.RunDetails.Last().Exception.Message == "Operation was canceled.", + Assert.True(experiment.RunDetails.Last().Exception.Message.Contains("Operation was canceled"), "Training process was not successfully canceled after maximum experiment time was reached."); // Ensure that the best found model can still run after maximum experiment time was reached. From 299b05b5fcde6a1df490f7288738896a1fc47b72 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Wed, 28 Oct 2020 17:41:43 -0700 Subject: [PATCH 10/26] Added main MLContext listener-timer --- .../Experiment/Experiment.cs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs index 524d6e7bd7..7e0a4443f7 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs @@ -69,6 +69,16 @@ private void MaxExperimentTimeExpiredEvent(object state) } } + private void MainContextCanceledEvent(object state) + { + // If the main MLContext is canceled, cancel the ongoing model training and MLContext. + if ((_context.Model.GetEnvironment() as ICancelable).IsCanceled) + { + _logger.Warning("Main MLContext has been canceled. Ending experiment..."); + _currentModelMLContext.CancelExecution(); + } + } + public IList Execute() { var iterationResults = new List(); @@ -78,7 +88,7 @@ public IList Execute() // is not a positive number. if (_experimentSettings.MaxExperimentTimeInSeconds > 0) { - Timer timer = new Timer( + Timer maxExperimentTimeTimer = new Timer( new TimerCallback(MaxExperimentTimeExpiredEvent), null, _experimentSettings.MaxExperimentTimeInSeconds * 1000, Timeout.Infinite ); @@ -88,6 +98,11 @@ public IList Execute() // either 0 or >0. else _experimentTimerExpired = true; + // Add second timer to check for the cancelation signal from the main MLContext + // to the active child MLContext. This timer will propagate the cancelation + // signal from the main to the child MLContexs if the main MLContext is + // canceled. + Timer mainContextCanceledTimer = new Timer(new TimerCallback(MainContextCanceledEvent), null, 1000, 1000); do { From ce747fbbd410ce3408b3a2c10a1c3b22e8587a97 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Thu, 29 Oct 2020 17:27:37 -0700 Subject: [PATCH 11/26] Added PRNG on _context, held onto timers for avoiding GC --- .../Experiment/Experiment.cs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs index 7e0a4443f7..5995fbdb03 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs @@ -27,8 +27,11 @@ internal class Experiment where TRunDetail : RunDetail private readonly IRunner _runner; private readonly IList _history; private readonly IChannel _logger; + private Timer _maxExperimentTimeTimer; + private Timer _mainContextCanceledTimer; private bool _experimentTimerExpired; private MLContext _currentModelMLContext; + private Random _newContextSeedGenerator; public Experiment(MLContext context, TaskKind task, @@ -70,14 +73,14 @@ private void MaxExperimentTimeExpiredEvent(object state) } private void MainContextCanceledEvent(object state) - { + { // If the main MLContext is canceled, cancel the ongoing model training and MLContext. if ((_context.Model.GetEnvironment() as ICancelable).IsCanceled) { _logger.Warning("Main MLContext has been canceled. Ending experiment..."); _currentModelMLContext.CancelExecution(); } - } + } public IList Execute() { @@ -88,7 +91,7 @@ public IList Execute() // is not a positive number. if (_experimentSettings.MaxExperimentTimeInSeconds > 0) { - Timer maxExperimentTimeTimer = new Timer( + _maxExperimentTimeTimer = new Timer( new TimerCallback(MaxExperimentTimeExpiredEvent), null, _experimentSettings.MaxExperimentTimeInSeconds * 1000, Timeout.Infinite ); @@ -102,7 +105,12 @@ public IList Execute() // to the active child MLContext. This timer will propagate the cancelation // signal from the main to the child MLContexs if the main MLContext is // canceled. - Timer mainContextCanceledTimer = new Timer(new TimerCallback(MainContextCanceledEvent), null, 1000, 1000); + _mainContextCanceledTimer = new Timer(new TimerCallback(MainContextCanceledEvent), null, 1000, 1000); + + // Pseudo random number generator to result in deterministic runs with the provided main MLContext's seed and to + // maintain variability between training iterations. + int? mainContextSeed = ((ISeededEnvironment)_context.Model.GetEnvironment()).Seed; + _newContextSeedGenerator = (mainContextSeed.HasValue) ? RandomUtils.Create(mainContextSeed.Value) : RandomUtils.Create(); do { @@ -114,7 +122,7 @@ public IList Execute() // A new MLContext is needed per model run. When max experiment time is reached, each used // context is canceled to stop further model training. The cancellation of the main MLContext // a user has instantiated is not desirable, thus additional MLContexts are used. - _currentModelMLContext = new MLContext(((ISeededEnvironment)_context.Model.GetEnvironment()).Seed); + _currentModelMLContext = new MLContext(_newContextSeedGenerator.Next()); var pipeline = PipelineSuggester.GetNextInferredPipeline(_currentModelMLContext, _history, _datasetColumnInfo, _task, _optimizingMetricInfo.IsMaximizing, _experimentSettings.CacheBeforeTrainer, _trainerAllowList); From 763550058198a7b12375925a5b5b781658cb837a Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Thu, 29 Oct 2020 22:14:54 -0700 Subject: [PATCH 12/26] Addressed reviews --- src/Microsoft.ML.AutoML/Experiment/Experiment.cs | 3 ++- src/Microsoft.ML.AutoML/Experiment/SuggestedPipeline.cs | 4 ++-- test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 8 +++++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs index 5995fbdb03..4dee1aded2 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs @@ -101,6 +101,7 @@ public IList Execute() // either 0 or >0. else _experimentTimerExpired = true; + // Add second timer to check for the cancelation signal from the main MLContext // to the active child MLContext. This timer will propagate the cancelation // signal from the main to the child MLContexs if the main MLContext is @@ -122,7 +123,7 @@ public IList Execute() // A new MLContext is needed per model run. When max experiment time is reached, each used // context is canceled to stop further model training. The cancellation of the main MLContext // a user has instantiated is not desirable, thus additional MLContexts are used. - _currentModelMLContext = new MLContext(_newContextSeedGenerator.Next()); + _currentModelMLContext = _newContextSeedGenerator == null ? new MLContext() : new MLContext(_newContextSeedGenerator.Next()); var pipeline = PipelineSuggester.GetNextInferredPipeline(_currentModelMLContext, _history, _datasetColumnInfo, _task, _optimizingMetricInfo.IsMaximizing, _experimentSettings.CacheBeforeTrainer, _trainerAllowList); diff --git a/src/Microsoft.ML.AutoML/Experiment/SuggestedPipeline.cs b/src/Microsoft.ML.AutoML/Experiment/SuggestedPipeline.cs index 3c35118a76..aeaa72a4de 100644 --- a/src/Microsoft.ML.AutoML/Experiment/SuggestedPipeline.cs +++ b/src/Microsoft.ML.AutoML/Experiment/SuggestedPipeline.cs @@ -53,9 +53,9 @@ public override int GetHashCode() } public MLContext GetContext() - { + { return _context; - } + } public Pipeline ToPipeline() { diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index 273fa38243..cef4b33c6f 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -167,7 +167,7 @@ public void AutoFitRankingTest() Assert.True(experimentResults[i].RunDetails.Count() > 0); Assert.NotNull(bestRun.ValidationMetrics); Assert.True(bestRun.ValidationMetrics.NormalizedDiscountedCumulativeGains.Last() > 0.4); - Assert.True(bestRun.ValidationMetrics.DiscountedCumulativeGains.Last() > 20); + Assert.True(bestRun.ValidationMetrics.DiscountedCumulativeGains.Last() > 19); var outputSchema = bestRun.Model.GetOutputSchema(trainDataView.Schema); var expectedOutputNames = new string[] { labelColumnName, groupIdColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB, "Features", scoreColumnName }; @@ -345,8 +345,10 @@ public void AutoFitMaxExperimentTimeTest() // Ensure that the best found model can still run after maximum experiment time was reached. var refitModel = experiment.BestRun.Estimator.Fit(trainData); IDataView predictions = refitModel.Transform(trainData); - var metrics = context.BinaryClassification.Evaluate(predictions, labelColumnName: DatasetUtil.UciAdultLabel); - Assert.True(metrics?.Accuracy > 0.5); + var prev = predictions.Preview(); + Assert.Equal(30, predictions.Schema.Count); + Assert.True(predictions.Schema.GetColumnOrNull("PredictedLabel").HasValue); + Assert.True(predictions.Schema.GetColumnOrNull("Score").HasValue); } private TextLoader.Options GetLoaderArgs(string labelColumnName, string userIdColumnName, string itemIdColumnName) From 94a80de176534c04ba2fabd63f5c31214caf8a4e Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Thu, 29 Oct 2020 23:23:36 -0700 Subject: [PATCH 13/26] Unit test edits --- test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index cef4b33c6f..eaed3a422c 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -334,7 +334,7 @@ public void AutoFitMaxExperimentTimeTest() var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var experiment = context.Auto() - .CreateBinaryClassificationExperiment(10) + .CreateBinaryClassificationExperiment(30) .Execute(trainData, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); // Ensure the (last) model that was training when maximum experiment time was reached has been stopped, @@ -343,12 +343,9 @@ public void AutoFitMaxExperimentTimeTest() "Training process was not successfully canceled after maximum experiment time was reached."); // Ensure that the best found model can still run after maximum experiment time was reached. - var refitModel = experiment.BestRun.Estimator.Fit(trainData); - IDataView predictions = refitModel.Transform(trainData); - var prev = predictions.Preview(); - Assert.Equal(30, predictions.Schema.Count); - Assert.True(predictions.Schema.GetColumnOrNull("PredictedLabel").HasValue); - Assert.True(predictions.Schema.GetColumnOrNull("Score").HasValue); + IDataView predictions = experiment.BestRun.Model.Transform(trainData); + var metrics = context.BinaryClassification.Evaluate(predictions, labelColumnName: DatasetUtil.UciAdultLabel); + Assert.True(metrics?.Accuracy > 0.5); } private TextLoader.Options GetLoaderArgs(string labelColumnName, string userIdColumnName, string itemIdColumnName) From abe1d7f0290fca2dca1dff83da3d83a2e4d89e95 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Fri, 30 Oct 2020 01:26:33 -0700 Subject: [PATCH 14/26] Increase run time of experiment to guarantee probabilities --- test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index eaed3a422c..eb063e7c01 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -334,7 +334,7 @@ public void AutoFitMaxExperimentTimeTest() var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var experiment = context.Auto() - .CreateBinaryClassificationExperiment(30) + .CreateBinaryClassificationExperiment(60) .Execute(trainData, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); // Ensure the (last) model that was training when maximum experiment time was reached has been stopped, From 9585a50d2326e6e383f212dab5c03357e586e470 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Fri, 30 Oct 2020 02:06:39 -0700 Subject: [PATCH 15/26] Edited unit test to check produced schema of next run model's predictions --- test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index eb063e7c01..3db558c4e8 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -344,8 +344,7 @@ public void AutoFitMaxExperimentTimeTest() // Ensure that the best found model can still run after maximum experiment time was reached. IDataView predictions = experiment.BestRun.Model.Transform(trainData); - var metrics = context.BinaryClassification.Evaluate(predictions, labelColumnName: DatasetUtil.UciAdultLabel); - Assert.True(metrics?.Accuracy > 0.5); + Assert.True(predictions.Schema.Count >= 30); } private TextLoader.Options GetLoaderArgs(string labelColumnName, string userIdColumnName, string itemIdColumnName) From 1ab662f80712a4ccb52e636706a7d60777498889 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Fri, 30 Oct 2020 03:04:05 -0700 Subject: [PATCH 16/26] Remove scheme check as different CI builds result in varying schemas --- test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index 3db558c4e8..1359faa7d7 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -344,7 +344,6 @@ public void AutoFitMaxExperimentTimeTest() // Ensure that the best found model can still run after maximum experiment time was reached. IDataView predictions = experiment.BestRun.Model.Transform(trainData); - Assert.True(predictions.Schema.Count >= 30); } private TextLoader.Options GetLoaderArgs(string labelColumnName, string userIdColumnName, string itemIdColumnName) From bc9e578bea736ab9bbafdea20ee61a5db469914c Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Fri, 30 Oct 2020 03:49:22 -0700 Subject: [PATCH 17/26] Decrease max experiment time unit test time --- test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index 1359faa7d7..d8cd87607e 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -334,7 +334,7 @@ public void AutoFitMaxExperimentTimeTest() var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var experiment = context.Auto() - .CreateBinaryClassificationExperiment(60) + .CreateBinaryClassificationExperiment(15) .Execute(trainData, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); // Ensure the (last) model that was training when maximum experiment time was reached has been stopped, From 2d8d06f5acd2545500ca429173280e0a37b2db6a Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Mon, 2 Nov 2020 11:11:04 -0800 Subject: [PATCH 18/26] Added Timers --- .../Experiment/Experiment.cs | 19 +++++++++++-------- .../Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 2 +- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs index cddac8278b..07f9690010 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs @@ -7,7 +7,7 @@ using System.Diagnostics; using System.IO; using System.Linq; -using System.Threading; +using System.Timers; using Microsoft.ML.Data; using Microsoft.ML.Runtime; @@ -59,7 +59,7 @@ public Experiment(MLContext context, _experimentTimerExpired = false; } - private void MaxExperimentTimeExpiredEvent(object state) + private void MaxExperimentTimeExpiredEvent(Object source, ElapsedEventArgs e) { // If at least one model was run, end experiment immediately. // Else, wait for first model to run before experiment is concluded. @@ -72,7 +72,7 @@ private void MaxExperimentTimeExpiredEvent(object state) } } - private void MainContextCanceledEvent(object state) + private void MainContextCanceledEvent(Object source, ElapsedEventArgs e) { // If the main MLContext is canceled, cancel the ongoing model training and MLContext. if ((_context.Model.GetEnvironment() as ICancelable).IsCanceled) @@ -91,10 +91,10 @@ public IList Execute() // is not a positive number. if (_experimentSettings.MaxExperimentTimeInSeconds > 0) { - _maxExperimentTimeTimer = new Timer( - new TimerCallback(MaxExperimentTimeExpiredEvent), null, - _experimentSettings.MaxExperimentTimeInSeconds * 1000, Timeout.Infinite - ); + _maxExperimentTimeTimer = new Timer(_experimentSettings.MaxExperimentTimeInSeconds * 1000); + _maxExperimentTimeTimer.Elapsed += MaxExperimentTimeExpiredEvent; + _maxExperimentTimeTimer.AutoReset = false; + _maxExperimentTimeTimer.Enabled = true; } // If given max duration of experiment is 0, only 1 model will be trained. // _experimentSettings.MaxExperimentTimeInSeconds is of type uint, it is @@ -106,7 +106,10 @@ public IList Execute() // to the active child MLContext. This timer will propagate the cancelation // signal from the main to the child MLContexs if the main MLContext is // canceled. - _mainContextCanceledTimer = new Timer(new TimerCallback(MainContextCanceledEvent), null, 1000, 1000); + _mainContextCanceledTimer = new Timer(1000); + _mainContextCanceledTimer.Elapsed += MainContextCanceledEvent; + _mainContextCanceledTimer.AutoReset = true; + _mainContextCanceledTimer.Enabled = true; // Pseudo random number generator to result in deterministic runs with the provided main MLContext's seed and to // maintain variability between training iterations. diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index 9897e4a965..c62e7c3de9 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -122,7 +122,7 @@ public void AutoFitRegressionTest(string culture) // Furthermore, these issues might only occur after ~70 // iterations, so more experiment time is needed for this to // occur. - uint experimentTime = (uint) (culture == "en-US" ? 0 : 360); + uint experimentTime = (uint) (culture == "en-US" ? 0 : 180); var experimentSettings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = experimentTime}; if (!Environment.Is64BitProcess) From 490d8c1573cbb198885b0db94cd97f899386c9cf Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Mon, 2 Nov 2020 12:37:41 -0800 Subject: [PATCH 19/26] Increase second timer time, edit unit test --- src/Microsoft.ML.AutoML/Experiment/Experiment.cs | 2 +- test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs index 07f9690010..f5730b5db1 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs @@ -106,7 +106,7 @@ public IList Execute() // to the active child MLContext. This timer will propagate the cancelation // signal from the main to the child MLContexs if the main MLContext is // canceled. - _mainContextCanceledTimer = new Timer(1000); + _mainContextCanceledTimer = new Timer(3000); _mainContextCanceledTimer.Elapsed += MainContextCanceledEvent; _mainContextCanceledTimer.AutoReset = true; _mainContextCanceledTimer.Enabled = true; diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index c62e7c3de9..2a7994d024 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -130,6 +130,8 @@ public void AutoFitRegressionTest(string culture) // LightGBM isn't available on x86 machines experimentSettings.Trainers.Remove(RegressionTrainer.LightGbm); } + // FastForest takes too long compared to other trainers + experimentSettings.Trainers.Remove(RegressionTrainer.FastForest); var context = new MLContext(1); var dataPath = DatasetUtil.GetMlNetGeneratedRegressionDataset(); @@ -269,7 +271,7 @@ public void AutoFitRecommendationTest() // STEP 2: Run AutoML experiment ExperimentResult experimentResult = mlContext.Auto() - .CreateRecommendationExperiment(5) + .CreateRecommendationExperiment(8) .Execute(trainDataView, testDataView, new ColumnInformation() { From b0de1d310f10754afdc6d5adc012a971fa0f5096 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Mon, 2 Nov 2020 16:45:42 -0800 Subject: [PATCH 20/26] Added try catch for OperationCanceledException in Execute() --- .../Experiment/Experiment.cs | 111 ++++++++++-------- .../Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 4 +- 2 files changed, 60 insertions(+), 55 deletions(-) diff --git a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs index f5730b5db1..3dcd428de2 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs @@ -7,7 +7,7 @@ using System.Diagnostics; using System.IO; using System.Linq; -using System.Timers; +using System.Threading; using Microsoft.ML.Data; using Microsoft.ML.Runtime; @@ -59,7 +59,7 @@ public Experiment(MLContext context, _experimentTimerExpired = false; } - private void MaxExperimentTimeExpiredEvent(Object source, ElapsedEventArgs e) + private void MaxExperimentTimeExpiredEvent(object state) { // If at least one model was run, end experiment immediately. // Else, wait for first model to run before experiment is concluded. @@ -72,7 +72,7 @@ private void MaxExperimentTimeExpiredEvent(Object source, ElapsedEventArgs e) } } - private void MainContextCanceledEvent(Object source, ElapsedEventArgs e) + private void MainContextCanceledEvent(object state) { // If the main MLContext is canceled, cancel the ongoing model training and MLContext. if ((_context.Model.GetEnvironment() as ICancelable).IsCanceled) @@ -91,10 +91,10 @@ public IList Execute() // is not a positive number. if (_experimentSettings.MaxExperimentTimeInSeconds > 0) { - _maxExperimentTimeTimer = new Timer(_experimentSettings.MaxExperimentTimeInSeconds * 1000); - _maxExperimentTimeTimer.Elapsed += MaxExperimentTimeExpiredEvent; - _maxExperimentTimeTimer.AutoReset = false; - _maxExperimentTimeTimer.Enabled = true; + _maxExperimentTimeTimer = new Timer( + new TimerCallback(MaxExperimentTimeExpiredEvent), null, + _experimentSettings.MaxExperimentTimeInSeconds * 1000, Timeout.Infinite + ); } // If given max duration of experiment is 0, only 1 model will be trained. // _experimentSettings.MaxExperimentTimeInSeconds is of type uint, it is @@ -106,62 +106,69 @@ public IList Execute() // to the active child MLContext. This timer will propagate the cancelation // signal from the main to the child MLContexs if the main MLContext is // canceled. - _mainContextCanceledTimer = new Timer(3000); - _mainContextCanceledTimer.Elapsed += MainContextCanceledEvent; - _mainContextCanceledTimer.AutoReset = true; - _mainContextCanceledTimer.Enabled = true; + _mainContextCanceledTimer = new Timer(new TimerCallback(MainContextCanceledEvent), null, 1000, 1000); // Pseudo random number generator to result in deterministic runs with the provided main MLContext's seed and to // maintain variability between training iterations. - int? mainContextSeed = ((ISeededEnvironment)_context.Model.GetEnvironment()).Seed; + int ? mainContextSeed = ((ISeededEnvironment)_context.Model.GetEnvironment()).Seed; _newContextSeedGenerator = (mainContextSeed.HasValue) ? RandomUtils.Create(mainContextSeed.Value) : RandomUtils.Create(); do { - var iterationStopwatch = Stopwatch.StartNew(); - - // get next pipeline - var getPipelineStopwatch = Stopwatch.StartNew(); - - // A new MLContext is needed per model run. When max experiment time is reached, each used - // context is canceled to stop further model training. The cancellation of the main MLContext - // a user has instantiated is not desirable, thus additional MLContexts are used. - _currentModelMLContext = _newContextSeedGenerator == null ? new MLContext() : new MLContext(_newContextSeedGenerator.Next()); - var pipeline = PipelineSuggester.GetNextInferredPipeline(_currentModelMLContext, _history, _datasetColumnInfo, _task, - _optimizingMetricInfo.IsMaximizing, _experimentSettings.CacheBeforeTrainer, _logger, _trainerAllowList); - - // break if no candidates returned, means no valid pipeline available - if (pipeline == null) - { - break; - } - - // evaluate pipeline - _logger.Trace($"Evaluating pipeline {pipeline.ToString()}"); - (SuggestedPipelineRunDetail suggestedPipelineRunDetail, TRunDetail runDetail) - = _runner.Run(pipeline, _modelDirectory, _history.Count + 1); - - _history.Add(suggestedPipelineRunDetail); - WriteIterationLog(pipeline, suggestedPipelineRunDetail, iterationStopwatch); - - runDetail.RuntimeInSeconds = iterationStopwatch.Elapsed.TotalSeconds; - runDetail.PipelineInferenceTimeInSeconds = getPipelineStopwatch.Elapsed.TotalSeconds; - - ReportProgress(runDetail); - iterationResults.Add(runDetail); - - // if model is perfect, break - if (_metricsAgent.IsModelPerfect(suggestedPipelineRunDetail.Score)) + try { - break; + var iterationStopwatch = Stopwatch.StartNew(); + + // get next pipeline + var getPipelineStopwatch = Stopwatch.StartNew(); + + // A new MLContext is needed per model run. When max experiment time is reached, each used + // context is canceled to stop further model training. The cancellation of the main MLContext + // a user has instantiated is not desirable, thus additional MLContexts are used. + _currentModelMLContext = _newContextSeedGenerator == null ? new MLContext() : new MLContext(_newContextSeedGenerator.Next()); + var pipeline = PipelineSuggester.GetNextInferredPipeline(_currentModelMLContext, _history, _datasetColumnInfo, _task, + _optimizingMetricInfo.IsMaximizing, _experimentSettings.CacheBeforeTrainer, _logger, _trainerAllowList); + // break if no candidates returned, means no valid pipeline available + if (pipeline == null) + { + break; + } + + // evaluate pipeline + _logger.Trace($"Evaluating pipeline {pipeline.ToString()}"); + (SuggestedPipelineRunDetail suggestedPipelineRunDetail, TRunDetail runDetail) + = _runner.Run(pipeline, _modelDirectory, _history.Count + 1); + + _history.Add(suggestedPipelineRunDetail); + WriteIterationLog(pipeline, suggestedPipelineRunDetail, iterationStopwatch); + + runDetail.RuntimeInSeconds = iterationStopwatch.Elapsed.TotalSeconds; + runDetail.PipelineInferenceTimeInSeconds = getPipelineStopwatch.Elapsed.TotalSeconds; + + ReportProgress(runDetail); + iterationResults.Add(runDetail); + + // if model is perfect, break + if (_metricsAgent.IsModelPerfect(suggestedPipelineRunDetail.Score)) + { + break; + } + + // If after third run, all runs have failed so far, throw exception + if (_history.Count() == 3 && _history.All(r => !r.RunSucceeded)) + { + throw new InvalidOperationException($"Training failed with the exception: {_history.Last().Exception}"); + } } - - // If after third run, all runs have failed so far, throw exception - if (_history.Count() == 3 && _history.All(r => !r.RunSucceeded)) + catch (OperationCanceledException e) { - throw new InvalidOperationException($"Training failed with the exception: {_history.Last().Exception}"); + // This exception is thrown when the IHost/MLContext of the trainer is canceled due to + // reaching maximum experiment time. Simply catch this exception and return finished + // iteration results. + _logger.Warning("OperationCanceledException has been caught after maximum experiment time" + + "was reached, and the running MLContext was stopped. Details: {0}", e.Message); + return iterationResults; } - } while (_history.Count < _experimentSettings.MaxModels && !_experimentSettings.CancellationToken.IsCancellationRequested && !_experimentTimerExpired); diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index 2a7994d024..c62e7c3de9 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -130,8 +130,6 @@ public void AutoFitRegressionTest(string culture) // LightGBM isn't available on x86 machines experimentSettings.Trainers.Remove(RegressionTrainer.LightGbm); } - // FastForest takes too long compared to other trainers - experimentSettings.Trainers.Remove(RegressionTrainer.FastForest); var context = new MLContext(1); var dataPath = DatasetUtil.GetMlNetGeneratedRegressionDataset(); @@ -271,7 +269,7 @@ public void AutoFitRecommendationTest() // STEP 2: Run AutoML experiment ExperimentResult experimentResult = mlContext.Auto() - .CreateRecommendationExperiment(8) + .CreateRecommendationExperiment(5) .Execute(trainDataView, testDataView, new ColumnInformation() { From 0918afa0da9fcd3bee058dea6f96114ddc6632c4 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Tue, 3 Nov 2020 01:50:11 -0800 Subject: [PATCH 21/26] Add AggregateException try catch to slow unit tests for parallel testing --- .../Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 92 +++++++++++++------ 1 file changed, 64 insertions(+), 28 deletions(-) diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index c62e7c3de9..471ee091b9 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System; +using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Threading; @@ -149,6 +150,22 @@ public void AutoFitRegressionTest(string culture) // If the below assertion fails, increase the experiment time so the number of iterations is met Assert.True(culture == "en-US" || result.RunDetails.Count() >= 75, $"RunDetails.Count() = {result.RunDetails.Count()}, below 75"); } + catch (AggregateException ae) + { + // During CI unit testing, the host machines can run slower than normal, which + // can increase the run time of unit tests and throw OperationCanceledExceptions + // from multiple threads in the form of a single AggregateException. + foreach (var ex in ae.Flatten().InnerExceptions) + { + var ignoredExceptions = new List(); + if (ex is OperationCanceledException) + continue; + else + ignoredExceptions.Add(ex); + if (ignoredExceptions.Count > 0) + throw new AggregateException(ignoredExceptions); + } + } finally { Thread.CurrentThread.CurrentCulture = originalCulture; @@ -268,34 +285,53 @@ public void AutoFitRecommendationTest() var testDataView = reader.Load(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.testFilename))); // STEP 2: Run AutoML experiment - ExperimentResult experimentResult = mlContext.Auto() - .CreateRecommendationExperiment(5) - .Execute(trainDataView, testDataView, - new ColumnInformation() - { - LabelColumnName = labelColumnName, - UserIdColumnName = userColumnName, - ItemIdColumnName = itemColumnName - }); - - RunDetail bestRun = experimentResult.BestRun; - Assert.True(experimentResult.RunDetails.Count() > 1); - Assert.NotNull(bestRun.ValidationMetrics); - Assert.True(experimentResult.RunDetails.Max(i => i?.ValidationMetrics?.RSquared* i?.ValidationMetrics?.RSquared) > 0.5); - - var outputSchema = bestRun.Model.GetOutputSchema(trainDataView.Schema); - var expectedOutputNames = new string[] { labelColumnName, userColumnName, userColumnName, itemColumnName, itemColumnName, scoreColumnName }; - foreach (var col in outputSchema) - Assert.True(col.Name == expectedOutputNames[col.Index]); - - IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView); - // Retrieve label column's index from the test IDataView - testDataView.Schema.TryGetColumnIndex(labelColumnName, out int labelColumnId); - // Retrieve score column's index from the IDataView produced by the trained model - testDataViewWithBestScore.Schema.TryGetColumnIndex(scoreColumnName, out int scoreColumnId); - - var metrices = mlContext.Recommendation().Evaluate(testDataViewWithBestScore, labelColumnName: labelColumnName, scoreColumnName: scoreColumnName); - Assert.NotEqual(0, metrices.MeanSquaredError); + try + { + ExperimentResult experimentResult = mlContext.Auto() + .CreateRecommendationExperiment(5) + .Execute(trainDataView, testDataView, + new ColumnInformation() + { + LabelColumnName = labelColumnName, + UserIdColumnName = userColumnName, + ItemIdColumnName = itemColumnName + }); + + RunDetail bestRun = experimentResult.BestRun; + Assert.True(experimentResult.RunDetails.Count() > 1); + Assert.NotNull(bestRun.ValidationMetrics); + Assert.True(experimentResult.RunDetails.Max(i => i?.ValidationMetrics?.RSquared* i?.ValidationMetrics?.RSquared) > 0.5); + + var outputSchema = bestRun.Model.GetOutputSchema(trainDataView.Schema); + var expectedOutputNames = new string[] { labelColumnName, userColumnName, userColumnName, itemColumnName, itemColumnName, scoreColumnName }; + foreach (var col in outputSchema) + Assert.True(col.Name == expectedOutputNames[col.Index]); + + IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView); + // Retrieve label column's index from the test IDataView + testDataView.Schema.TryGetColumnIndex(labelColumnName, out int labelColumnId); + // Retrieve score column's index from the IDataView produced by the trained model + testDataViewWithBestScore.Schema.TryGetColumnIndex(scoreColumnName, out int scoreColumnId); + + var metrices = mlContext.Recommendation().Evaluate(testDataViewWithBestScore, labelColumnName: labelColumnName, scoreColumnName: scoreColumnName); + Assert.NotEqual(0, metrices.MeanSquaredError); + } + catch (AggregateException ae) + { + // During CI unit testing, the host machines can run slower than normal, which + // can increase the run time of unit tests and throw OperationCanceledExceptions + // from multiple threads in the form of a single AggregateException. + foreach (var ex in ae.Flatten().InnerExceptions) + { + var ignoredExceptions = new List(); + if (ex is OperationCanceledException) + continue; + else + ignoredExceptions.Add(ex); + if (ignoredExceptions.Count > 0) + throw new AggregateException(ignoredExceptions); + } + } } [Fact] From 0922aed1a0f3c3f01d771a58d6e1cf5b424ad466 Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Tue, 3 Nov 2020 02:38:13 -0800 Subject: [PATCH 22/26] Reviews --- src/Microsoft.ML.AutoML/Experiment/Experiment.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs index 3dcd428de2..f62a4dc429 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs @@ -78,6 +78,9 @@ private void MainContextCanceledEvent(object state) if ((_context.Model.GetEnvironment() as ICancelable).IsCanceled) { _logger.Warning("Main MLContext has been canceled. Ending experiment..."); + // Stop timer to prevent restarting and prevent continuous calls to + // MainContextCanceledEvent + _mainContextCanceledTimer.Change(Timeout.Infinite, Timeout.Infinite); _currentModelMLContext.CancelExecution(); } } @@ -110,7 +113,7 @@ public IList Execute() // Pseudo random number generator to result in deterministic runs with the provided main MLContext's seed and to // maintain variability between training iterations. - int ? mainContextSeed = ((ISeededEnvironment)_context.Model.GetEnvironment()).Seed; + int? mainContextSeed = ((ISeededEnvironment)_context.Model.GetEnvironment()).Seed; _newContextSeedGenerator = (mainContextSeed.HasValue) ? RandomUtils.Create(mainContextSeed.Value) : RandomUtils.Create(); do From ef4b34fb469bb31f255cf2412bbfa04bb1e6868f Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Tue, 3 Nov 2020 03:45:37 -0800 Subject: [PATCH 23/26] Final reviews --- .../Experiment/Experiment.cs | 2 +- .../Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs index f62a4dc429..1e5232a1d6 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs @@ -114,7 +114,7 @@ public IList Execute() // Pseudo random number generator to result in deterministic runs with the provided main MLContext's seed and to // maintain variability between training iterations. int? mainContextSeed = ((ISeededEnvironment)_context.Model.GetEnvironment()).Seed; - _newContextSeedGenerator = (mainContextSeed.HasValue) ? RandomUtils.Create(mainContextSeed.Value) : RandomUtils.Create(); + _newContextSeedGenerator = (mainContextSeed.HasValue) ? RandomUtils.Create(mainContextSeed.Value) : null; do { diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index 471ee091b9..578b6d51d5 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -393,7 +393,7 @@ public void AutoFitWithPresplittedData() [Fact] public void AutoFitMaxExperimentTimeTest() - { + { // A single binary classification experiment takes less than 5 seconds. // System.OperationCanceledException is thrown when ongoing experiment // is canceled and at least one model has been generated. @@ -403,16 +403,19 @@ public void AutoFitMaxExperimentTimeTest() var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var experiment = context.Auto() - .CreateBinaryClassificationExperiment(15) + .CreateBinaryClassificationExperiment(20) .Execute(trainData, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); // Ensure the (last) model that was training when maximum experiment time was reached has been stopped, - // and that its MLContext has been canceled. - Assert.True(experiment.RunDetails.Last().Exception.Message.Contains("Operation was canceled"), - "Training process was not successfully canceled after maximum experiment time was reached."); - - // Ensure that the best found model can still run after maximum experiment time was reached. - IDataView predictions = experiment.BestRun.Model.Transform(trainData); + // and that its MLContext has been canceled. Sometimes during CI unit testing, the host machines can run slower than normal, which + // can increase the run time of unit tests, and may not produce multiple runs. + if (experiment.RunDetails.Select(r => r.Exception == null).Count() > 1) + { + Assert.True(experiment.RunDetails.Last().Exception.Message.Contains("Operation was canceled"), + "Training process was not successfully canceled after maximum experiment time was reached."); + // Ensure that the best found model can still run after maximum experiment time was reached. + IDataView predictions = experiment.BestRun.Model.Transform(trainData); + } } private TextLoader.Options GetLoaderArgs(string labelColumnName, string userIdColumnName, string itemIdColumnName) From b4b49ce93296983c1550de968555afb6386a9ebf Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Tue, 3 Nov 2020 04:36:13 -0800 Subject: [PATCH 24/26] Added LightGBMFact to binary classification test --- test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index 578b6d51d5..32480bdd30 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -391,12 +391,14 @@ public void AutoFitWithPresplittedData() } - [Fact] + [LightGBMFact] public void AutoFitMaxExperimentTimeTest() { // A single binary classification experiment takes less than 5 seconds. // System.OperationCanceledException is thrown when ongoing experiment // is canceled and at least one model has been generated. + // BinaryClassificationExperiment includes LightGBM, which is not 32-bit + // compatible. var context = new MLContext(1); var dataPath = DatasetUtil.GetUciAdultDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); From 6502fc80c8fe7882279dc92e6564121cd3032d6d Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Tue, 3 Nov 2020 05:42:26 -0800 Subject: [PATCH 25/26] Removed extra Operation Stopped exception try catch --- .../Experiment/Experiment.cs | 88 ++++++++----------- .../Microsoft.ML.AutoML.Tests/AutoFitTests.cs | 4 +- 2 files changed, 40 insertions(+), 52 deletions(-) diff --git a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs index 1e5232a1d6..ba685821c7 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs @@ -118,59 +118,47 @@ public IList Execute() do { - try + var iterationStopwatch = Stopwatch.StartNew(); + + // get next pipeline + var getPipelineStopwatch = Stopwatch.StartNew(); + + // A new MLContext is needed per model run. When max experiment time is reached, each used + // context is canceled to stop further model training. The cancellation of the main MLContext + // a user has instantiated is not desirable, thus additional MLContexts are used. + _currentModelMLContext = _newContextSeedGenerator == null ? new MLContext() : new MLContext(_newContextSeedGenerator.Next()); + var pipeline = PipelineSuggester.GetNextInferredPipeline(_currentModelMLContext, _history, _datasetColumnInfo, _task, + _optimizingMetricInfo.IsMaximizing, _experimentSettings.CacheBeforeTrainer, _logger, _trainerAllowList); + // break if no candidates returned, means no valid pipeline available + if (pipeline == null) { - var iterationStopwatch = Stopwatch.StartNew(); - - // get next pipeline - var getPipelineStopwatch = Stopwatch.StartNew(); - - // A new MLContext is needed per model run. When max experiment time is reached, each used - // context is canceled to stop further model training. The cancellation of the main MLContext - // a user has instantiated is not desirable, thus additional MLContexts are used. - _currentModelMLContext = _newContextSeedGenerator == null ? new MLContext() : new MLContext(_newContextSeedGenerator.Next()); - var pipeline = PipelineSuggester.GetNextInferredPipeline(_currentModelMLContext, _history, _datasetColumnInfo, _task, - _optimizingMetricInfo.IsMaximizing, _experimentSettings.CacheBeforeTrainer, _logger, _trainerAllowList); - // break if no candidates returned, means no valid pipeline available - if (pipeline == null) - { - break; - } - - // evaluate pipeline - _logger.Trace($"Evaluating pipeline {pipeline.ToString()}"); - (SuggestedPipelineRunDetail suggestedPipelineRunDetail, TRunDetail runDetail) - = _runner.Run(pipeline, _modelDirectory, _history.Count + 1); - - _history.Add(suggestedPipelineRunDetail); - WriteIterationLog(pipeline, suggestedPipelineRunDetail, iterationStopwatch); - - runDetail.RuntimeInSeconds = iterationStopwatch.Elapsed.TotalSeconds; - runDetail.PipelineInferenceTimeInSeconds = getPipelineStopwatch.Elapsed.TotalSeconds; - - ReportProgress(runDetail); - iterationResults.Add(runDetail); - - // if model is perfect, break - if (_metricsAgent.IsModelPerfect(suggestedPipelineRunDetail.Score)) - { - break; - } - - // If after third run, all runs have failed so far, throw exception - if (_history.Count() == 3 && _history.All(r => !r.RunSucceeded)) - { - throw new InvalidOperationException($"Training failed with the exception: {_history.Last().Exception}"); - } + break; } - catch (OperationCanceledException e) + + // evaluate pipeline + _logger.Trace($"Evaluating pipeline {pipeline.ToString()}"); + (SuggestedPipelineRunDetail suggestedPipelineRunDetail, TRunDetail runDetail) + = _runner.Run(pipeline, _modelDirectory, _history.Count + 1); + + _history.Add(suggestedPipelineRunDetail); + WriteIterationLog(pipeline, suggestedPipelineRunDetail, iterationStopwatch); + + runDetail.RuntimeInSeconds = iterationStopwatch.Elapsed.TotalSeconds; + runDetail.PipelineInferenceTimeInSeconds = getPipelineStopwatch.Elapsed.TotalSeconds; + + ReportProgress(runDetail); + iterationResults.Add(runDetail); + + // if model is perfect, break + if (_metricsAgent.IsModelPerfect(suggestedPipelineRunDetail.Score)) + { + break; + } + + // If after third run, all runs have failed so far, throw exception + if (_history.Count() == 3 && _history.All(r => !r.RunSucceeded)) { - // This exception is thrown when the IHost/MLContext of the trainer is canceled due to - // reaching maximum experiment time. Simply catch this exception and return finished - // iteration results. - _logger.Warning("OperationCanceledException has been caught after maximum experiment time" + - "was reached, and the running MLContext was stopped. Details: {0}", e.Message); - return iterationResults; + throw new InvalidOperationException($"Training failed with the exception: {_history.Last().Exception}"); } } while (_history.Count < _experimentSettings.MaxModels && !_experimentSettings.CancellationToken.IsCancellationRequested && diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index 32480bdd30..39c5bf1332 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -405,13 +405,13 @@ public void AutoFitMaxExperimentTimeTest() var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var experiment = context.Auto() - .CreateBinaryClassificationExperiment(20) + .CreateBinaryClassificationExperiment(15) .Execute(trainData, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); // Ensure the (last) model that was training when maximum experiment time was reached has been stopped, // and that its MLContext has been canceled. Sometimes during CI unit testing, the host machines can run slower than normal, which // can increase the run time of unit tests, and may not produce multiple runs. - if (experiment.RunDetails.Select(r => r.Exception == null).Count() > 1) + if (experiment.RunDetails.Select(r => r.Exception == null).Count() > 1 && experiment.RunDetails.Last().Exception != null) { Assert.True(experiment.RunDetails.Last().Exception.Message.Contains("Operation was canceled"), "Training process was not successfully canceled after maximum experiment time was reached."); From 28e2f2e62bbcc9ce4f484fdc6ebfd8da00a053ca Mon Sep 17 00:00:00 2001 From: Mustafa Bal <5262061+mstfbl@users.noreply.github.com> Date: Tue, 3 Nov 2020 06:03:14 -0800 Subject: [PATCH 26/26] Add back OperationCanceledException to Experiment.cs --- .../Experiment/Experiment.cs | 88 +++++++++++-------- 1 file changed, 50 insertions(+), 38 deletions(-) diff --git a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs index ba685821c7..1e5232a1d6 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Experiment.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Experiment.cs @@ -118,47 +118,59 @@ public IList Execute() do { - var iterationStopwatch = Stopwatch.StartNew(); - - // get next pipeline - var getPipelineStopwatch = Stopwatch.StartNew(); - - // A new MLContext is needed per model run. When max experiment time is reached, each used - // context is canceled to stop further model training. The cancellation of the main MLContext - // a user has instantiated is not desirable, thus additional MLContexts are used. - _currentModelMLContext = _newContextSeedGenerator == null ? new MLContext() : new MLContext(_newContextSeedGenerator.Next()); - var pipeline = PipelineSuggester.GetNextInferredPipeline(_currentModelMLContext, _history, _datasetColumnInfo, _task, - _optimizingMetricInfo.IsMaximizing, _experimentSettings.CacheBeforeTrainer, _logger, _trainerAllowList); - // break if no candidates returned, means no valid pipeline available - if (pipeline == null) + try { - break; + var iterationStopwatch = Stopwatch.StartNew(); + + // get next pipeline + var getPipelineStopwatch = Stopwatch.StartNew(); + + // A new MLContext is needed per model run. When max experiment time is reached, each used + // context is canceled to stop further model training. The cancellation of the main MLContext + // a user has instantiated is not desirable, thus additional MLContexts are used. + _currentModelMLContext = _newContextSeedGenerator == null ? new MLContext() : new MLContext(_newContextSeedGenerator.Next()); + var pipeline = PipelineSuggester.GetNextInferredPipeline(_currentModelMLContext, _history, _datasetColumnInfo, _task, + _optimizingMetricInfo.IsMaximizing, _experimentSettings.CacheBeforeTrainer, _logger, _trainerAllowList); + // break if no candidates returned, means no valid pipeline available + if (pipeline == null) + { + break; + } + + // evaluate pipeline + _logger.Trace($"Evaluating pipeline {pipeline.ToString()}"); + (SuggestedPipelineRunDetail suggestedPipelineRunDetail, TRunDetail runDetail) + = _runner.Run(pipeline, _modelDirectory, _history.Count + 1); + + _history.Add(suggestedPipelineRunDetail); + WriteIterationLog(pipeline, suggestedPipelineRunDetail, iterationStopwatch); + + runDetail.RuntimeInSeconds = iterationStopwatch.Elapsed.TotalSeconds; + runDetail.PipelineInferenceTimeInSeconds = getPipelineStopwatch.Elapsed.TotalSeconds; + + ReportProgress(runDetail); + iterationResults.Add(runDetail); + + // if model is perfect, break + if (_metricsAgent.IsModelPerfect(suggestedPipelineRunDetail.Score)) + { + break; + } + + // If after third run, all runs have failed so far, throw exception + if (_history.Count() == 3 && _history.All(r => !r.RunSucceeded)) + { + throw new InvalidOperationException($"Training failed with the exception: {_history.Last().Exception}"); + } } - - // evaluate pipeline - _logger.Trace($"Evaluating pipeline {pipeline.ToString()}"); - (SuggestedPipelineRunDetail suggestedPipelineRunDetail, TRunDetail runDetail) - = _runner.Run(pipeline, _modelDirectory, _history.Count + 1); - - _history.Add(suggestedPipelineRunDetail); - WriteIterationLog(pipeline, suggestedPipelineRunDetail, iterationStopwatch); - - runDetail.RuntimeInSeconds = iterationStopwatch.Elapsed.TotalSeconds; - runDetail.PipelineInferenceTimeInSeconds = getPipelineStopwatch.Elapsed.TotalSeconds; - - ReportProgress(runDetail); - iterationResults.Add(runDetail); - - // if model is perfect, break - if (_metricsAgent.IsModelPerfect(suggestedPipelineRunDetail.Score)) - { - break; - } - - // If after third run, all runs have failed so far, throw exception - if (_history.Count() == 3 && _history.All(r => !r.RunSucceeded)) + catch (OperationCanceledException e) { - throw new InvalidOperationException($"Training failed with the exception: {_history.Last().Exception}"); + // This exception is thrown when the IHost/MLContext of the trainer is canceled due to + // reaching maximum experiment time. Simply catch this exception and return finished + // iteration results. + _logger.Warning("OperationCanceledException has been caught after maximum experiment time" + + "was reached, and the running MLContext was stopped. Details: {0}", e.Message); + return iterationResults; } } while (_history.Count < _experimentSettings.MaxModels && !_experimentSettings.CancellationToken.IsCancellationRequested &&