diff --git a/src/Microsoft.ML.Auto/API/ExperimentSettings.cs b/src/Microsoft.ML.Auto/API/ExperimentSettings.cs index fedb625c4e..4fae30de04 100644 --- a/src/Microsoft.ML.Auto/API/ExperimentSettings.cs +++ b/src/Microsoft.ML.Auto/API/ExperimentSettings.cs @@ -20,6 +20,7 @@ public class ExperimentSettings /// public DirectoryInfo ModelDirectory { get; set; } = null; + /// /// This setting controls whether or not an AutoML experiment will make use of ML.NET-provided caching. /// If set to true, caching will be forced on for all pipelines. If set to false, caching will be forced off. /// If set to null (default value), AutoML will decide whether to enable caching for each model. diff --git a/src/Microsoft.ML.Auto/AutoMlUtils.cs b/src/Microsoft.ML.Auto/AutoMlUtils.cs index b4dcb61e8d..ffde30ebff 100644 --- a/src/Microsoft.ML.Auto/AutoMlUtils.cs +++ b/src/Microsoft.ML.Auto/AutoMlUtils.cs @@ -3,17 +3,18 @@ // See the LICENSE file in the project root for more information. using System; +using System.Threading; using Microsoft.Data.DataView; namespace Microsoft.ML.Auto { internal static class AutoMlUtils { - public static Random Random = new Random(); + public static readonly ThreadLocal random = new ThreadLocal(() => new Random()); public static void Assert(bool boolVal, string message = null) { - if(!boolVal) + if (!boolVal) { message = message ?? "Assertion failed"; throw new InvalidOperationException(message); diff --git a/src/Microsoft.ML.Auto/ColumnInference/ColumnTypeInference.cs b/src/Microsoft.ML.Auto/ColumnInference/ColumnTypeInference.cs index 2277d8f7bb..806ea1c52e 100644 --- a/src/Microsoft.ML.Auto/ColumnInference/ColumnTypeInference.cs +++ b/src/Microsoft.ML.Auto/ColumnInference/ColumnTypeInference.cs @@ -177,7 +177,7 @@ public void Apply(IntermediateColumn[] columns) if (!col.RawData.Skip(1) .All(x => { - Single value; + float value; return Conversions.TryParse(in x, out value); }) ) diff --git a/src/Microsoft.ML.Auto/ColumnInference/TextFileSample.cs b/src/Microsoft.ML.Auto/ColumnInference/TextFileSample.cs index 28b0aaf60e..f528d5a8b8 100644 --- a/src/Microsoft.ML.Auto/ColumnInference/TextFileSample.cs +++ b/src/Microsoft.ML.Auto/ColumnInference/TextFileSample.cs @@ -138,9 +138,8 @@ public static TextFileSample CreateFromFullStream(Stream stream) // determine the start of each remaining chunk long fileSizeRemaining = fileSize - firstChunk.Length - ((long)chunkSize) * chunkCount; - var rnd = AutoMlUtils.Random; var chunkStartIndices = Enumerable.Range(0, chunkCount) - .Select(x => rnd.NextDouble() * fileSizeRemaining) + .Select(x => AutoMlUtils.random.Value.NextDouble() * fileSizeRemaining) .OrderBy(x => x) .Select((spot, i) => (long)(spot + firstChunk.Length + i * chunkSize)) .ToArray(); diff --git a/src/Microsoft.ML.Auto/Sweepers/ISweeper.cs b/src/Microsoft.ML.Auto/Sweepers/ISweeper.cs index 07c44e7b60..457ebd2645 100644 --- a/src/Microsoft.ML.Auto/Sweepers/ISweeper.cs +++ b/src/Microsoft.ML.Auto/Sweepers/ISweeper.cs @@ -6,7 +6,6 @@ using System.Collections; using System.Collections.Generic; using System.Linq; -using Float = System.Single; namespace Microsoft.ML.Auto { @@ -236,10 +235,10 @@ IComparable IRunResult.MetricValue /// internal sealed class RunMetric { - private readonly Float _primaryMetric; - private readonly Float[] _metricDistribution; + private readonly float _primaryMetric; + private readonly float[] _metricDistribution; - public RunMetric(Float primaryMetric, IEnumerable metricDistribution = null) + public RunMetric(float primaryMetric, IEnumerable metricDistribution = null) { _primaryMetric = primaryMetric; if (metricDistribution != null) @@ -252,7 +251,7 @@ public RunMetric(Float primaryMetric, IEnumerable metricDistribution = nu /// By default, smart sweeping algorithms will maximize this metric. /// If you want to minimize, either negate this value or change the option in the arguments of the sweeper constructor. /// - public Float PrimaryMetric + public float PrimaryMetric { get { return _primaryMetric; } } @@ -261,11 +260,11 @@ public Float PrimaryMetric /// The (optional) distribution of the metric. /// This distribution can be a secondary measure of how good a run was, e.g per-fold AUC, per-fold accuracy, (sampled) per-instance log loss etc. /// - public Float[] GetMetricDistribution() + public float[] GetMetricDistribution() { if (_metricDistribution == null) return null; - var result = new Float[_metricDistribution.Length]; + var result = new float[_metricDistribution.Length]; Array.Copy(_metricDistribution, result, _metricDistribution.Length); return result; } diff --git a/src/Microsoft.ML.Auto/Sweepers/Parameters.cs b/src/Microsoft.ML.Auto/Sweepers/Parameters.cs index 0a29d35cd5..9c9ffabfd3 100644 --- a/src/Microsoft.ML.Auto/Sweepers/Parameters.cs +++ b/src/Microsoft.ML.Auto/Sweepers/Parameters.cs @@ -4,7 +4,6 @@ using System; using System.Collections.Generic; -using Float = System.Single; namespace Microsoft.ML.Auto { @@ -29,10 +28,10 @@ internal abstract class NumericParamArguments : BaseParamArguments internal class FloatParamArguments : NumericParamArguments { //[Argument(ArgumentType.Required, HelpText = "Minimum value")] - public Float Min; + public float Min; //[Argument(ArgumentType.Required, HelpText = "Maximum value")] - public Float Max; + public float Max; } internal class LongParamArguments : NumericParamArguments @@ -95,11 +94,11 @@ public override int GetHashCode() } } - internal sealed class FloatParameterValue : IParameterValue + internal sealed class FloatParameterValue : IParameterValue { private readonly string _name; private readonly string _valueText; - private readonly Float _value; + private readonly float _value; public string Name { @@ -111,14 +110,14 @@ public string ValueText get { return _valueText; } } - public Float Value + public float Value { get { return _value; } } - public FloatParameterValue(string name, Float value) + public FloatParameterValue(string name, float value) { - AutoMlUtils.Assert(!Float.IsNaN(value)); + AutoMlUtils.Assert(!float.IsNaN(value)); _name = name; _value = value; _valueText = _value.ToString("R"); @@ -186,7 +185,7 @@ public override int GetHashCode() internal interface INumericValueGenerator : IValueGenerator { - Float NormalizeValue(IParameterValue value); + float NormalizeValue(IParameterValue value); bool InRange(IParameterValue value); } @@ -294,7 +293,7 @@ public int Count } } - public Float NormalizeValue(IParameterValue value) + public float NormalizeValue(IParameterValue value) { var valueTyped = value as LongParameterValue; AutoMlUtils.Assert(valueTyped != null, "LongValueGenerator could not normalized parameter because it is not of the correct type"); @@ -302,11 +301,11 @@ public Float NormalizeValue(IParameterValue value) if (_args.LogBase) { - Float logBase = (Float)(_args.StepSize ?? Math.Pow(1.0 * _args.Max / _args.Min, 1.0 / (_args.NumSteps - 1))); - return (Float)((Math.Log(valueTyped.Value, logBase) - Math.Log(_args.Min, logBase)) / (Math.Log(_args.Max, logBase) - Math.Log(_args.Min, logBase))); + float logBase = (float)(_args.StepSize ?? Math.Pow(1.0 * _args.Max / _args.Min, 1.0 / (_args.NumSteps - 1))); + return (float)((Math.Log(valueTyped.Value, logBase) - Math.Log(_args.Min, logBase)) / (Math.Log(_args.Max, logBase) - Math.Log(_args.Min, logBase))); } else - return (Float)(valueTyped.Value - _args.Min) / (_args.Max - _args.Min); + return (float)(valueTyped.Value - _args.Min) / (_args.Max - _args.Min); } public bool InRange(IParameterValue value) @@ -339,7 +338,7 @@ public FloatValueGenerator(FloatParamArguments args) // REVIEW: Is Float accurate enough? public IParameterValue CreateFromNormalized(Double normalizedValue) { - Float val; + float val; if (_args.LogBase) { // REVIEW: review the math below, it only works for positive Min and Max @@ -348,10 +347,10 @@ public IParameterValue CreateFromNormalized(Double normalizedValue) : _args.StepSize.Value; var logMax = Math.Log(_args.Max, logBase); var logMin = Math.Log(_args.Min, logBase); - val = (Float)(_args.Min * Math.Pow(logBase, normalizedValue * (logMax - logMin))); + val = (float)(_args.Min * Math.Pow(logBase, normalizedValue * (logMax - logMin))); } else - val = (Float)(_args.Min + normalizedValue * (_args.Max - _args.Min)); + val = (float)(_args.Min + normalizedValue * (_args.Max - _args.Min)); return new FloatParameterValue(_args.Name, val); } @@ -367,11 +366,11 @@ private void EnsureParameterValues() // REVIEW: review the math below, it only works for positive Min and Max var logBase = _args.StepSize ?? Math.Pow(1.0 * _args.Max / _args.Min, 1.0 / (_args.NumSteps - 1)); - Float prevValue = Float.NegativeInfinity; + float prevValue = float.NegativeInfinity; var maxPlusEpsilon = _args.Max * Math.Sqrt(logBase); for (Double value = _args.Min; value <= maxPlusEpsilon; value *= logBase) { - var floatValue = (Float)value; + var floatValue = (float)value; if (floatValue > prevValue) result.Add(new FloatParameterValue(_args.Name, floatValue)); prevValue = floatValue; @@ -380,11 +379,11 @@ private void EnsureParameterValues() else { var stepSize = _args.StepSize ?? (Double)(_args.Max - _args.Min) / (_args.NumSteps - 1); - Float prevValue = Float.NegativeInfinity; + float prevValue = float.NegativeInfinity; var maxPlusEpsilon = _args.Max + stepSize / 2; for (Double value = _args.Min; value <= maxPlusEpsilon; value += stepSize) { - var floatValue = (Float)value; + var floatValue = (float)value; if (floatValue > prevValue) result.Add(new FloatParameterValue(_args.Name, floatValue)); prevValue = floatValue; @@ -412,7 +411,7 @@ public int Count } } - public Float NormalizeValue(IParameterValue value) + public float NormalizeValue(IParameterValue value) { var valueTyped = value as FloatParameterValue; AutoMlUtils.Assert(valueTyped != null, "FloatValueGenerator could not normalized parameter because it is not of the correct type"); @@ -420,8 +419,8 @@ public Float NormalizeValue(IParameterValue value) if (_args.LogBase) { - Float logBase = (Float)(_args.StepSize ?? Math.Pow(1.0 * _args.Max / _args.Min, 1.0 / (_args.NumSteps - 1))); - return (Float)((Math.Log(valueTyped.Value, logBase) - Math.Log(_args.Min, logBase)) / (Math.Log(_args.Max, logBase) - Math.Log(_args.Min, logBase))); + float logBase = (float)(_args.StepSize ?? Math.Pow(1.0 * _args.Max / _args.Min, 1.0 / (_args.NumSteps - 1))); + return (float)((Math.Log(valueTyped.Value, logBase) - Math.Log(_args.Min, logBase)) / (Math.Log(_args.Max, logBase) - Math.Log(_args.Min, logBase))); } else return (valueTyped.Value - _args.Min) / (_args.Max - _args.Min); diff --git a/src/Microsoft.ML.Auto/Sweepers/Random.cs b/src/Microsoft.ML.Auto/Sweepers/Random.cs index 24e097032e..36edcb8dca 100644 --- a/src/Microsoft.ML.Auto/Sweepers/Random.cs +++ b/src/Microsoft.ML.Auto/Sweepers/Random.cs @@ -23,7 +23,7 @@ public UniformRandomSweeper(ArgumentsBase args, IValueGenerator[] sweepParameter protected override ParameterSet CreateParamSet() { - return new ParameterSet(SweepParameters.Select(sweepParameter => sweepParameter.CreateFromNormalized(AutoMlUtils.Random.NextDouble()))); + return new ParameterSet(SweepParameters.Select(sweepParameter => sweepParameter.CreateFromNormalized(AutoMlUtils.random.Value.NextDouble()))); } } } diff --git a/src/Microsoft.ML.Auto/Sweepers/SweeperProbabilityUtils.cs b/src/Microsoft.ML.Auto/Sweepers/SweeperProbabilityUtils.cs index a45ae8473b..646a7df869 100644 --- a/src/Microsoft.ML.Auto/Sweepers/SweeperProbabilityUtils.cs +++ b/src/Microsoft.ML.Auto/Sweepers/SweeperProbabilityUtils.cs @@ -4,7 +4,6 @@ using System; using System.Collections.Generic; -using Float = System.Single; namespace Microsoft.ML.Auto { @@ -35,8 +34,8 @@ public double[] NormalRVs(int numRVs, double mu, double sigma) for (int i = 0; i < numRVs; i++) { - u1 = AutoMlUtils.Random.NextDouble(); - u2 = AutoMlUtils.Random.NextDouble(); + u1 = AutoMlUtils.random.Value.NextDouble(); + u2 = AutoMlUtils.random.Value.NextDouble(); rvs.Add(mu + sigma * Math.Sqrt(-2.0 * Math.Log(u1)) * Math.Sin(2.0 * Math.PI * u2)); } @@ -61,11 +60,11 @@ private int BinarySearch(double[] a, double u, int low, int high) return a[mid] >= u ? BinarySearch(a, u, low, mid) : BinarySearch(a, u, mid, high); } - public static Float[] ParameterSetAsFloatArray(IValueGenerator[] sweepParams, ParameterSet ps, bool expandCategoricals = true) + public static float[] ParameterSetAsFloatArray(IValueGenerator[] sweepParams, ParameterSet ps, bool expandCategoricals = true) { AutoMlUtils.Assert(ps.Count == sweepParams.Length); - var result = new List(); + var result = new List(); for (int i = 0; i < sweepParams.Length; i++) { @@ -115,7 +114,7 @@ public static Float[] ParameterSetAsFloatArray(IValueGenerator[] sweepParams, Pa return result.ToArray(); } - public static ParameterSet FloatArrayAsParameterSet(IValueGenerator[] sweepParams, Float[] array, bool expandedCategoricals = true) + public static ParameterSet FloatArrayAsParameterSet(IValueGenerator[] sweepParams, float[] array, bool expandedCategoricals = true) { AutoMlUtils.Assert(array.Length == sweepParams.Length); diff --git a/src/Test/GetNextPipelineTests.cs b/src/Test/GetNextPipelineTests.cs index 3f3a531557..05af19dc75 100644 --- a/src/Test/GetNextPipelineTests.cs +++ b/src/Test/GetNextPipelineTests.cs @@ -57,10 +57,10 @@ public void GetNextPipelineMock() break; } - var result = new PipelineScore(pipeline, AutoMlUtils.Random.NextDouble(), true); + var result = new PipelineScore(pipeline, AutoMlUtils.random.Value.NextDouble(), true); history.Add(result); } - + Assert.AreEqual(maxIterations, history.Count); // Get all 'Stage 1' and 'Stage 2' runs from Pipeline Suggester diff --git a/src/Test/TextFileSampleTests.cs b/src/Test/TextFileSampleTests.cs index a787d0e065..b9cf90f39a 100644 --- a/src/Test/TextFileSampleTests.cs +++ b/src/Test/TextFileSampleTests.cs @@ -22,7 +22,7 @@ public void CanParseLargeRandomStream() for (var i = 0; i < numRows; i++) { var row = new byte[rowSize]; - AutoMlUtils.Random.NextBytes(row); + AutoMlUtils.random.Value.NextBytes(row); // ensure byte array has no 0s, so text file sampler doesn't // think file is encoded with UTF-16 or UTF-32 without a BOM