Skip to content

Commit

Permalink
[AutoML] Rev AutoML public API; add required native references to Aut…
Browse files Browse the repository at this point in the history
…oML projects (dotnet#3364)
  • Loading branch information
daholste authored and Dmitry-A committed Aug 22, 2019
1 parent 5d9e058 commit fb5f418
Show file tree
Hide file tree
Showing 28 changed files with 322 additions and 237 deletions.
32 changes: 5 additions & 27 deletions src/Microsoft.ML.Auto/API/BinaryClassificationExperiment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ public enum BinaryClassificationTrainer
/// <summary>
/// AutoML experiment on binary classification datasets.
/// </summary>
public sealed class BinaryClassificationExperiment : ExperimentBase<BinaryClassificationMetrics>
public sealed class BinaryClassificationExperiment : ExperimentBase<BinaryClassificationMetrics, BinaryExperimentSettings>
{
internal BinaryClassificationExperiment(MLContext context, BinaryExperimentSettings settings)
: base(context,
Expand All @@ -143,37 +143,15 @@ internal BinaryClassificationExperiment(MLContext context, BinaryExperimentSetti
TrainerExtensionUtil.GetTrainerNames(settings.Trainers))
{
}
}

/// <summary>
/// Extension methods that operate over binary experiment run results.
/// </summary>
public static class BinaryExperimentResultExtensions
{
/// <summary>
/// Select the best run from an enumeration of experiment runs.
/// </summary>
/// <param name="results">Enumeration of AutoML experiment run results.</param>
/// <param name="metric">Metric to consider when selecting the best run.</param>
/// <returns>The best experiment run.</returns>
public static RunDetail<BinaryClassificationMetrics> Best(this IEnumerable<RunDetail<BinaryClassificationMetrics>> results, BinaryClassificationMetric metric = BinaryClassificationMetric.Accuracy)
private protected override RunDetail<BinaryClassificationMetrics> GetBestRun(IEnumerable<RunDetail<BinaryClassificationMetrics>> results)
{
var metricsAgent = new BinaryMetricsAgent(null, metric);
var isMetricMaximizing = new OptimizingMetricInfo(metric).IsMaximizing;
return BestResultUtil.GetBestRun(results, metricsAgent, isMetricMaximizing);
return BestResultUtil.GetBestRun(results, MetricsAgent, OptimizingMetricInfo.IsMaximizing);
}

/// <summary>
/// Select the best run from an enumeration of experiment cross validation runs.
/// </summary>
/// <param name="results">Enumeration of AutoML experiment cross validation run results.</param>
/// <param name="metric">Metric to consider when selecting the best run.</param>
/// <returns>The best experiment run.</returns>
public static CrossValidationRunDetail<BinaryClassificationMetrics> Best(this IEnumerable<CrossValidationRunDetail<BinaryClassificationMetrics>> results, BinaryClassificationMetric metric = BinaryClassificationMetric.Accuracy)
private protected override CrossValidationRunDetail<BinaryClassificationMetrics> GetBestCrossValRun(IEnumerable<CrossValidationRunDetail<BinaryClassificationMetrics>> results)
{
var metricsAgent = new BinaryMetricsAgent(null, metric);
var isMetricMaximizing = new OptimizingMetricInfo(metric).IsMaximizing;
return BestResultUtil.GetBestRun(results, metricsAgent, isMetricMaximizing);
return BestResultUtil.GetBestRun(results, MetricsAgent, OptimizingMetricInfo.IsMaximizing);
}
}
}
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Auto/API/ColumnInference.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public sealed class ColumnInferenceResults
/// <remarks>
/// <para>Contains the inferred purposes of each column. See <see cref="Auto.ColumnInformation"/> for more details.</para>
/// <para>This can be fed to the AutoML API when running an experiment.
/// See <typeref cref="ExperimentBase{TMetrics}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />
/// See <typeref cref="ExperimentBase{TMetrics, TExperimentSettings}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />
/// for example.</para>
/// </remarks>
public ColumnInformation ColumnInformation { get; internal set; } = new ColumnInformation();
Expand All @@ -42,7 +42,7 @@ public sealed class ColumnInferenceResults
/// it enumerates the dataset columns that AutoML should treat as categorical,
/// the columns AutoML should ignore, which column is the label, etc.</para>
/// <para><see cref="ColumnInformation"/> can be fed to the AutoML API when running an experiment.
/// See <typeref cref="ExperimentBase{TMetrics}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />
/// See <typeref cref="ExperimentBase{TMetrics, TExperimentSettings}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />
/// for example.</para>
/// </remarks>
public sealed class ColumnInformation
Expand Down
99 changes: 56 additions & 43 deletions src/Microsoft.ML.Auto/API/ExperimentBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,32 @@ namespace Microsoft.ML.Auto
/// (like <see cref="BinaryClassificationExperiment"/>) inherit from this class.
/// </summary>
/// <typeparam name="TMetrics">Metrics type used by task-specific AutoML experiments.</typeparam>
public abstract class ExperimentBase<TMetrics> where TMetrics : class
/// <typeparam name="TExperimentSettings">Experiment settings type.</typeparam>
public abstract class ExperimentBase<TMetrics, TExperimentSettings>
where TMetrics : class
where TExperimentSettings : ExperimentSettings
{
private protected readonly MLContext Context;
private protected readonly IMetricsAgent<TMetrics> MetricsAgent;
private protected readonly OptimizingMetricInfo OptimizingMetricInfo;
private protected readonly TExperimentSettings Settings;

private readonly IMetricsAgent<TMetrics> _metricsAgent;
private readonly OptimizingMetricInfo _optimizingMetricInfo;
private readonly ExperimentSettings _settings;
private readonly AutoMLLogger _logger;
private readonly TaskKind _task;
private readonly IEnumerable<TrainerName> _trainerWhitelist;

internal ExperimentBase(MLContext context,
IMetricsAgent<TMetrics> metricsAgent,
OptimizingMetricInfo optimizingMetricInfo,
ExperimentSettings settings,
TExperimentSettings settings,
TaskKind task,
IEnumerable<TrainerName> trainerWhitelist)
{
Context = context;
_metricsAgent = metricsAgent;
_optimizingMetricInfo = optimizingMetricInfo;
_settings = settings;
MetricsAgent = metricsAgent;
OptimizingMetricInfo = optimizingMetricInfo;
Settings = settings;
_logger = new AutoMLLogger(context);
_task = task;
_trainerWhitelist = trainerWhitelist;
}
Expand All @@ -53,12 +58,11 @@ internal ExperimentBase(MLContext context,
/// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the
/// course of the experiment.
/// </param>
/// <returns>An enumeration of all the runs in an experiment. See <see cref="RunDetail{TMetrics}"/>
/// for more information on the contents of a run.</returns>
/// <returns>The experiment result.</returns>
/// <remarks>
/// Depending on the size of your data, the AutoML experiment could take a long time to execute.
/// </remarks>
public IEnumerable<RunDetail<TMetrics>> Execute(IDataView trainData, string labelColumnName = DefaultColumnNames.Label,
public ExperimentResult<TMetrics> Execute(IDataView trainData, string labelColumnName = DefaultColumnNames.Label,
string samplingKeyColumn = null, IEstimator<ITransformer> preFeaturizer = null, IProgress<RunDetail<TMetrics>> progressHandler = null)
{
var columnInformation = new ColumnInformation()
Expand All @@ -83,12 +87,11 @@ public IEnumerable<RunDetail<TMetrics>> Execute(IDataView trainData, string labe
/// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the
/// course of the experiment.
/// </param>
/// <returns>An enumeration of all the runs in an experiment. See <see cref="RunDetail{TMetrics}"/>
/// for more information on the contents of a run.</returns>
/// <returns>The experiment result.</returns>
/// <remarks>
/// Depending on the size of your data, the AutoML experiment could take a long time to execute.
/// </remarks>
public IEnumerable<RunDetail<TMetrics>> Execute(IDataView trainData, ColumnInformation columnInformation,
public ExperimentResult<TMetrics> Execute(IDataView trainData, ColumnInformation columnInformation,
IEstimator<ITransformer> preFeaturizer = null, IProgress<RunDetail<TMetrics>> progressHandler = null)
{
// Cross val threshold for # of dataset rows --
Expand Down Expand Up @@ -126,12 +129,11 @@ public IEnumerable<RunDetail<TMetrics>> Execute(IDataView trainData, ColumnInfor
/// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the
/// course of the experiment.
/// </param>
/// <returns>An enumeration of all the runs in an experiment. See <see cref="RunDetail{TMetrics}"/>
/// for more information on the contents of a run.</returns>
/// <returns>The experiment result.</returns>
/// <remarks>
/// Depending on the size of your data, the AutoML experiment could take a long time to execute.
/// </remarks>
public IEnumerable<RunDetail<TMetrics>> Execute(IDataView trainData, IDataView validationData, string labelColumnName = DefaultColumnNames.Label, IEstimator<ITransformer> preFeaturizer = null, IProgress<RunDetail<TMetrics>> progressHandler = null)
public ExperimentResult<TMetrics> Execute(IDataView trainData, IDataView validationData, string labelColumnName = DefaultColumnNames.Label, IEstimator<ITransformer> preFeaturizer = null, IProgress<RunDetail<TMetrics>> progressHandler = null)
{
var columnInformation = new ColumnInformation() { LabelColumnName = labelColumnName };
return Execute(trainData, validationData, columnInformation, preFeaturizer, progressHandler);
Expand All @@ -152,12 +154,11 @@ public IEnumerable<RunDetail<TMetrics>> Execute(IDataView trainData, IDataView v
/// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the
/// course of the experiment.
/// </param>
/// <returns>An enumeration of all the runs in an experiment. See <see cref="RunDetail{TMetrics}"/>
/// for more information on the contents of a run.</returns>
/// <returns>The experiment result.</returns>
/// <remarks>
/// Depending on the size of your data, the AutoML experiment could take a long time to execute.
/// </remarks>
public IEnumerable<RunDetail<TMetrics>> Execute(IDataView trainData, IDataView validationData, ColumnInformation columnInformation, IEstimator<ITransformer> preFeaturizer = null, IProgress<RunDetail<TMetrics>> progressHandler = null)
public ExperimentResult<TMetrics> Execute(IDataView trainData, IDataView validationData, ColumnInformation columnInformation, IEstimator<ITransformer> preFeaturizer = null, IProgress<RunDetail<TMetrics>> progressHandler = null)
{
if (validationData == null)
{
Expand All @@ -183,12 +184,11 @@ public IEnumerable<RunDetail<TMetrics>> Execute(IDataView trainData, IDataView v
/// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the
/// course of the experiment.
/// </param>
/// <returns>An enumeration of all the runs in an experiment. See <see cref="RunDetail{TMetrics}"/>
/// for more information on the contents of a run.</returns>
/// <returns>The cross validation experiment result.</returns>
/// <remarks>
/// Depending on the size of your data, the AutoML experiment could take a long time to execute.
/// </remarks>
public IEnumerable<CrossValidationRunDetail<TMetrics>> Execute(IDataView trainData, uint numberOfCVFolds, ColumnInformation columnInformation = null, IEstimator<ITransformer> preFeaturizer = null, IProgress<CrossValidationRunDetail<TMetrics>> progressHandler = null)
public CrossValidationExperimentResult<TMetrics> Execute(IDataView trainData, uint numberOfCVFolds, ColumnInformation columnInformation = null, IEstimator<ITransformer> preFeaturizer = null, IProgress<CrossValidationRunDetail<TMetrics>> progressHandler = null)
{
UserInputValidationUtil.ValidateNumberOfCVFoldsArg(numberOfCVFolds);
var splitResult = SplitUtil.CrossValSplit(Context, trainData, numberOfCVFolds, columnInformation?.SamplingKeyColumnName);
Expand All @@ -211,12 +211,11 @@ public IEnumerable<CrossValidationRunDetail<TMetrics>> Execute(IDataView trainDa
/// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the
/// course of the experiment.
/// </param>
/// <returns>An enumeration of all the runs in an experiment. See <see cref="RunDetail{TMetrics}"/>
/// for more information on the contents of a run.</returns>
/// <returns>The cross validation experiment result.</returns>
/// <remarks>
/// Depending on the size of your data, the AutoML experiment could take a long time to execute.
/// </remarks>
public IEnumerable<CrossValidationRunDetail<TMetrics>> Execute(IDataView trainData,
public CrossValidationExperimentResult<TMetrics> Execute(IDataView trainData,
uint numberOfCVFolds, string labelColumnName = DefaultColumnNames.Label,
string samplingKeyColumn = null, IEstimator<ITransformer> preFeaturizer = null,
Progress<CrossValidationRunDetail<TMetrics>> progressHandler = null)
Expand All @@ -229,7 +228,11 @@ public IEnumerable<CrossValidationRunDetail<TMetrics>> Execute(IDataView trainDa
return Execute(trainData, numberOfCVFolds, columnInformation, preFeaturizer, progressHandler);
}

private IEnumerable<RunDetail<TMetrics>> ExecuteTrainValidate(IDataView trainData,
private protected abstract CrossValidationRunDetail<TMetrics> GetBestCrossValRun(IEnumerable<CrossValidationRunDetail<TMetrics>> results);

private protected abstract RunDetail<TMetrics> GetBestRun(IEnumerable<RunDetail<TMetrics>> results);

private ExperimentResult<TMetrics> ExecuteTrainValidate(IDataView trainData,
ColumnInformation columnInfo,
IDataView validationData,
IEstimator<ITransformer> preFeaturizer,
Expand All @@ -247,13 +250,13 @@ private IEnumerable<RunDetail<TMetrics>> ExecuteTrainValidate(IDataView trainDat
validationData = preprocessorTransform.Transform(validationData);
}

var runner = new TrainValidateRunner<TMetrics>(Context, trainData, validationData, columnInfo.LabelColumnName, _metricsAgent,
preFeaturizer, preprocessorTransform, _settings.DebugLogger);
var runner = new TrainValidateRunner<TMetrics>(Context, trainData, validationData, columnInfo.LabelColumnName, MetricsAgent,
preFeaturizer, preprocessorTransform, _logger);
var columns = DatasetColumnInfoUtil.GetDatasetColumnInfo(Context, trainData, columnInfo);
return Execute(columnInfo, columns, preFeaturizer, progressHandler, runner);
}

private IEnumerable<CrossValidationRunDetail<TMetrics>> ExecuteCrossVal(IDataView[] trainDatasets,
private CrossValidationExperimentResult<TMetrics> ExecuteCrossVal(IDataView[] trainDatasets,
ColumnInformation columnInfo,
IDataView[] validationDatasets,
IEstimator<ITransformer> preFeaturizer,
Expand All @@ -266,13 +269,21 @@ private IEnumerable<CrossValidationRunDetail<TMetrics>> ExecuteCrossVal(IDataVie
ITransformer[] preprocessorTransforms = null;
(trainDatasets, validationDatasets, preprocessorTransforms) = ApplyPreFeaturizerCrossVal(trainDatasets, validationDatasets, preFeaturizer);

var runner = new CrossValRunner<TMetrics>(Context, trainDatasets, validationDatasets, _metricsAgent, preFeaturizer,
preprocessorTransforms, columnInfo.LabelColumnName, _settings.DebugLogger);
var runner = new CrossValRunner<TMetrics>(Context, trainDatasets, validationDatasets, MetricsAgent, preFeaturizer,
preprocessorTransforms, columnInfo.LabelColumnName, _logger);
var columns = DatasetColumnInfoUtil.GetDatasetColumnInfo(Context, trainDatasets[0], columnInfo);
return Execute(columnInfo, columns, preFeaturizer, progressHandler, runner);

// Execute experiment & get all pipelines run
var experiment = new Experiment<CrossValidationRunDetail<TMetrics>, TMetrics>(Context, _task, OptimizingMetricInfo, progressHandler,
Settings, MetricsAgent, _trainerWhitelist, columns, runner, _logger);
var runDetails = experiment.Execute();

var bestRun = GetBestCrossValRun(runDetails);
var experimentResult = new CrossValidationExperimentResult<TMetrics>(runDetails, bestRun);
return experimentResult;
}

private IEnumerable<RunDetail<TMetrics>> ExecuteCrossValSummary(IDataView[] trainDatasets,
private ExperimentResult<TMetrics> ExecuteCrossValSummary(IDataView[] trainDatasets,
ColumnInformation columnInfo,
IDataView[] validationDatasets,
IEstimator<ITransformer> preFeaturizer,
Expand All @@ -285,24 +296,26 @@ private IEnumerable<RunDetail<TMetrics>> ExecuteCrossValSummary(IDataView[] trai
ITransformer[] preprocessorTransforms = null;
(trainDatasets, validationDatasets, preprocessorTransforms) = ApplyPreFeaturizerCrossVal(trainDatasets, validationDatasets, preFeaturizer);

var runner = new CrossValSummaryRunner<TMetrics>(Context, trainDatasets, validationDatasets, _metricsAgent, preFeaturizer,
preprocessorTransforms, columnInfo.LabelColumnName, _optimizingMetricInfo, _settings.DebugLogger);
var runner = new CrossValSummaryRunner<TMetrics>(Context, trainDatasets, validationDatasets, MetricsAgent, preFeaturizer,
preprocessorTransforms, columnInfo.LabelColumnName, OptimizingMetricInfo, _logger);
var columns = DatasetColumnInfoUtil.GetDatasetColumnInfo(Context, trainDatasets[0], columnInfo);
return Execute(columnInfo, columns, preFeaturizer, progressHandler, runner);
}

private IEnumerable<TRunDetail> Execute<TRunDetail>(ColumnInformation columnInfo,
private ExperimentResult<TMetrics> Execute(ColumnInformation columnInfo,
DatasetColumnInfo[] columns,
IEstimator<ITransformer> preFeaturizer,
IProgress<TRunDetail> progressHandler,
IRunner<TRunDetail> runner)
where TRunDetail : RunDetail
IProgress<RunDetail<TMetrics>> progressHandler,
IRunner<RunDetail<TMetrics>> runner)
{
// Execute experiment & get all pipelines run
var experiment = new Experiment<TRunDetail, TMetrics>(Context, _task, _optimizingMetricInfo, progressHandler,
_settings, _metricsAgent, _trainerWhitelist, columns, runner);
var experiment = new Experiment<RunDetail<TMetrics>, TMetrics>(Context, _task, OptimizingMetricInfo, progressHandler,
Settings, MetricsAgent, _trainerWhitelist, columns, runner, _logger);
var runDetails = experiment.Execute();

return experiment.Execute();
var bestRun = GetBestRun(runDetails);
var experimentResult = new ExperimentResult<TMetrics>(runDetails, bestRun);
return experimentResult;
}

private static (IDataView[] trainDatasets, IDataView[] validDatasets, ITransformer[] preprocessorTransforms)
Expand Down
Loading

0 comments on commit fb5f418

Please sign in to comment.