Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AutoML] Rev AutoML public API; add required native references to AutoML projects #3364

Merged
merged 9 commits into from
Apr 16, 2019
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 5 additions & 27 deletions src/Microsoft.ML.Auto/API/BinaryClassificationExperiment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ public enum BinaryClassificationTrainer
/// <summary>
/// AutoML experiment on binary classification datasets.
/// </summary>
public sealed class BinaryClassificationExperiment : ExperimentBase<BinaryClassificationMetrics>
public sealed class BinaryClassificationExperiment : ExperimentBase<BinaryClassificationMetrics, BinaryExperimentSettings>
{
internal BinaryClassificationExperiment(MLContext context, BinaryExperimentSettings settings)
: base(context,
Expand All @@ -143,37 +143,15 @@ internal BinaryClassificationExperiment(MLContext context, BinaryExperimentSetti
TrainerExtensionUtil.GetTrainerNames(settings.Trainers))
{
}
}

/// <summary>
/// Extension methods that operate over binary experiment run results.
/// </summary>
public static class BinaryExperimentResultExtensions
{
/// <summary>
/// Select the best run from an enumeration of experiment runs.
/// </summary>
/// <param name="results">Enumeration of AutoML experiment run results.</param>
/// <param name="metric">Metric to consider when selecting the best run.</param>
/// <returns>The best experiment run.</returns>
public static RunDetail<BinaryClassificationMetrics> Best(this IEnumerable<RunDetail<BinaryClassificationMetrics>> results, BinaryClassificationMetric metric = BinaryClassificationMetric.Accuracy)
private protected override RunDetail<BinaryClassificationMetrics> GetBestRun(IEnumerable<RunDetail<BinaryClassificationMetrics>> results)
{
var metricsAgent = new BinaryMetricsAgent(null, metric);
var isMetricMaximizing = new OptimizingMetricInfo(metric).IsMaximizing;
return BestResultUtil.GetBestRun(results, metricsAgent, isMetricMaximizing);
return BestResultUtil.GetBestRun(results, MetricsAgent, OptimizingMetricInfo.IsMaximizing);
}

/// <summary>
/// Select the best run from an enumeration of experiment cross validation runs.
/// </summary>
/// <param name="results">Enumeration of AutoML experiment cross validation run results.</param>
/// <param name="metric">Metric to consider when selecting the best run.</param>
/// <returns>The best experiment run.</returns>
public static CrossValidationRunDetail<BinaryClassificationMetrics> Best(this IEnumerable<CrossValidationRunDetail<BinaryClassificationMetrics>> results, BinaryClassificationMetric metric = BinaryClassificationMetric.Accuracy)
private protected override CrossValidationRunDetail<BinaryClassificationMetrics> GetBestCrossValRun(IEnumerable<CrossValidationRunDetail<BinaryClassificationMetrics>> results)
{
var metricsAgent = new BinaryMetricsAgent(null, metric);
var isMetricMaximizing = new OptimizingMetricInfo(metric).IsMaximizing;
return BestResultUtil.GetBestRun(results, metricsAgent, isMetricMaximizing);
return BestResultUtil.GetBestRun(results, MetricsAgent, OptimizingMetricInfo.IsMaximizing);
}
}
}
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Auto/API/ColumnInference.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public sealed class ColumnInferenceResults
/// <remarks>
/// <para>Contains the inferred purposes of each column. See <see cref="Auto.ColumnInformation"/> for more details.</para>
/// <para>This can be fed to the AutoML API when running an experiment.
/// See <typeref cref="ExperimentBase{TMetrics}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />
/// See <typeref cref="ExperimentBase{TMetrics, TExperimentSettings}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />
/// for example.</para>
/// </remarks>
public ColumnInformation ColumnInformation { get; internal set; } = new ColumnInformation();
Expand All @@ -42,7 +42,7 @@ public sealed class ColumnInferenceResults
/// it enumerates the dataset columns that AutoML should treat as categorical,
/// the columns AutoML should ignore, which column is the label, etc.</para>
/// <para><see cref="ColumnInformation"/> can be fed to the AutoML API when running an experiment.
/// See <typeref cref="ExperimentBase{TMetrics}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />
/// See <typeref cref="ExperimentBase{TMetrics, TExperimentSettings}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />
/// for example.</para>
/// </remarks>
public sealed class ColumnInformation
Expand Down
99 changes: 56 additions & 43 deletions src/Microsoft.ML.Auto/API/ExperimentBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,32 @@ namespace Microsoft.ML.Auto
/// (like <see cref="BinaryClassificationExperiment"/>) inherit from this class.
/// </summary>
/// <typeparam name="TMetrics">Metrics type used by task-specific AutoML experiments.</typeparam>
public abstract class ExperimentBase<TMetrics> where TMetrics : class
/// <typeparam name="TExperimentSettings">Experiment settings type.</typeparam>
public abstract class ExperimentBase<TMetrics, TExperimentSettings>
where TMetrics : class
where TExperimentSettings : ExperimentSettings
{
private protected readonly MLContext Context;
private protected readonly IMetricsAgent<TMetrics> MetricsAgent;
private protected readonly OptimizingMetricInfo OptimizingMetricInfo;
private protected readonly TExperimentSettings Settings;

private readonly IMetricsAgent<TMetrics> _metricsAgent;
private readonly OptimizingMetricInfo _optimizingMetricInfo;
private readonly ExperimentSettings _settings;
private readonly AutoMLLogger _logger;
private readonly TaskKind _task;
private readonly IEnumerable<TrainerName> _trainerWhitelist;

internal ExperimentBase(MLContext context,
IMetricsAgent<TMetrics> metricsAgent,
OptimizingMetricInfo optimizingMetricInfo,
ExperimentSettings settings,
TExperimentSettings settings,
TaskKind task,
IEnumerable<TrainerName> trainerWhitelist)
{
Context = context;
_metricsAgent = metricsAgent;
_optimizingMetricInfo = optimizingMetricInfo;
_settings = settings;
MetricsAgent = metricsAgent;
OptimizingMetricInfo = optimizingMetricInfo;
Settings = settings;
_logger = new AutoMLLogger(context);
_task = task;
_trainerWhitelist = trainerWhitelist;
}
Expand All @@ -53,12 +58,11 @@ internal ExperimentBase(MLContext context,
/// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the
/// course of the experiment.
/// </param>
/// <returns>An enumeration of all the runs in an experiment. See <see cref="RunDetail{TMetrics}"/>
/// for more information on the contents of a run.</returns>
/// <returns>The experiment result.</returns>
/// <remarks>
/// Depending on the size of your data, the AutoML experiment could take a long time to execute.
/// </remarks>
public IEnumerable<RunDetail<TMetrics>> Execute(IDataView trainData, string labelColumnName = DefaultColumnNames.Label,
public ExperimentResult<TMetrics> Execute(IDataView trainData, string labelColumnName = DefaultColumnNames.Label,
string samplingKeyColumn = null, IEstimator<ITransformer> preFeaturizer = null, IProgress<RunDetail<TMetrics>> progressHandler = null)
{
var columnInformation = new ColumnInformation()
Expand All @@ -83,12 +87,11 @@ public IEnumerable<RunDetail<TMetrics>> Execute(IDataView trainData, string labe
/// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the
/// course of the experiment.
/// </param>
/// <returns>An enumeration of all the runs in an experiment. See <see cref="RunDetail{TMetrics}"/>
/// for more information on the contents of a run.</returns>
/// <returns>The experiment result.</returns>
/// <remarks>
/// Depending on the size of your data, the AutoML experiment could take a long time to execute.
/// </remarks>
public IEnumerable<RunDetail<TMetrics>> Execute(IDataView trainData, ColumnInformation columnInformation,
public ExperimentResult<TMetrics> Execute(IDataView trainData, ColumnInformation columnInformation,
IEstimator<ITransformer> preFeaturizer = null, IProgress<RunDetail<TMetrics>> progressHandler = null)
{
// Cross val threshold for # of dataset rows --
Expand Down Expand Up @@ -126,12 +129,11 @@ public IEnumerable<RunDetail<TMetrics>> Execute(IDataView trainData, ColumnInfor
/// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the
/// course of the experiment.
/// </param>
/// <returns>An enumeration of all the runs in an experiment. See <see cref="RunDetail{TMetrics}"/>
/// for more information on the contents of a run.</returns>
/// <returns>The experiment result.</returns>
/// <remarks>
/// Depending on the size of your data, the AutoML experiment could take a long time to execute.
/// </remarks>
public IEnumerable<RunDetail<TMetrics>> Execute(IDataView trainData, IDataView validationData, string labelColumnName = DefaultColumnNames.Label, IEstimator<ITransformer> preFeaturizer = null, IProgress<RunDetail<TMetrics>> progressHandler = null)
public ExperimentResult<TMetrics> Execute(IDataView trainData, IDataView validationData, string labelColumnName = DefaultColumnNames.Label, IEstimator<ITransformer> preFeaturizer = null, IProgress<RunDetail<TMetrics>> progressHandler = null)
{
var columnInformation = new ColumnInformation() { LabelColumnName = labelColumnName };
return Execute(trainData, validationData, columnInformation, preFeaturizer, progressHandler);
Expand All @@ -152,12 +154,11 @@ public IEnumerable<RunDetail<TMetrics>> Execute(IDataView trainData, IDataView v
/// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the
/// course of the experiment.
/// </param>
/// <returns>An enumeration of all the runs in an experiment. See <see cref="RunDetail{TMetrics}"/>
/// for more information on the contents of a run.</returns>
/// <returns>The experiment result.</returns>
/// <remarks>
/// Depending on the size of your data, the AutoML experiment could take a long time to execute.
/// </remarks>
public IEnumerable<RunDetail<TMetrics>> Execute(IDataView trainData, IDataView validationData, ColumnInformation columnInformation, IEstimator<ITransformer> preFeaturizer = null, IProgress<RunDetail<TMetrics>> progressHandler = null)
public ExperimentResult<TMetrics> Execute(IDataView trainData, IDataView validationData, ColumnInformation columnInformation, IEstimator<ITransformer> preFeaturizer = null, IProgress<RunDetail<TMetrics>> progressHandler = null)
{
if (validationData == null)
{
Expand All @@ -183,12 +184,11 @@ public IEnumerable<RunDetail<TMetrics>> Execute(IDataView trainData, IDataView v
/// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the
/// course of the experiment.
/// </param>
/// <returns>An enumeration of all the runs in an experiment. See <see cref="RunDetail{TMetrics}"/>
/// for more information on the contents of a run.</returns>
/// <returns>The cross validation experiment result.</returns>
/// <remarks>
/// Depending on the size of your data, the AutoML experiment could take a long time to execute.
/// </remarks>
public IEnumerable<CrossValidationRunDetail<TMetrics>> Execute(IDataView trainData, uint numberOfCVFolds, ColumnInformation columnInformation = null, IEstimator<ITransformer> preFeaturizer = null, IProgress<CrossValidationRunDetail<TMetrics>> progressHandler = null)
public CrossValidationExperimentResult<TMetrics> Execute(IDataView trainData, uint numberOfCVFolds, ColumnInformation columnInformation = null, IEstimator<ITransformer> preFeaturizer = null, IProgress<CrossValidationRunDetail<TMetrics>> progressHandler = null)
{
UserInputValidationUtil.ValidateNumberOfCVFoldsArg(numberOfCVFolds);
var splitResult = SplitUtil.CrossValSplit(Context, trainData, numberOfCVFolds, columnInformation?.SamplingKeyColumnName);
Expand All @@ -211,12 +211,11 @@ public IEnumerable<CrossValidationRunDetail<TMetrics>> Execute(IDataView trainDa
/// <see cref="IProgress{T}.Report(T)"/> after each model it produces during the
/// course of the experiment.
/// </param>
/// <returns>An enumeration of all the runs in an experiment. See <see cref="RunDetail{TMetrics}"/>
/// for more information on the contents of a run.</returns>
/// <returns>The cross validation experiment result.</returns>
/// <remarks>
/// Depending on the size of your data, the AutoML experiment could take a long time to execute.
/// </remarks>
public IEnumerable<CrossValidationRunDetail<TMetrics>> Execute(IDataView trainData,
public CrossValidationExperimentResult<TMetrics> Execute(IDataView trainData,
uint numberOfCVFolds, string labelColumnName = DefaultColumnNames.Label,
string samplingKeyColumn = null, IEstimator<ITransformer> preFeaturizer = null,
Progress<CrossValidationRunDetail<TMetrics>> progressHandler = null)
Expand All @@ -229,7 +228,11 @@ public IEnumerable<CrossValidationRunDetail<TMetrics>> Execute(IDataView trainDa
return Execute(trainData, numberOfCVFolds, columnInformation, preFeaturizer, progressHandler);
}

private IEnumerable<RunDetail<TMetrics>> ExecuteTrainValidate(IDataView trainData,
private protected abstract CrossValidationRunDetail<TMetrics> GetBestCrossValRun(IEnumerable<CrossValidationRunDetail<TMetrics>> results);

private protected abstract RunDetail<TMetrics> GetBestRun(IEnumerable<RunDetail<TMetrics>> results);

private ExperimentResult<TMetrics> ExecuteTrainValidate(IDataView trainData,
ColumnInformation columnInfo,
IDataView validationData,
IEstimator<ITransformer> preFeaturizer,
Expand All @@ -247,13 +250,13 @@ private IEnumerable<RunDetail<TMetrics>> ExecuteTrainValidate(IDataView trainDat
validationData = preprocessorTransform.Transform(validationData);
}

var runner = new TrainValidateRunner<TMetrics>(Context, trainData, validationData, columnInfo.LabelColumnName, _metricsAgent,
preFeaturizer, preprocessorTransform, _settings.DebugLogger);
var runner = new TrainValidateRunner<TMetrics>(Context, trainData, validationData, columnInfo.LabelColumnName, MetricsAgent,
preFeaturizer, preprocessorTransform, _logger);
var columns = DatasetColumnInfoUtil.GetDatasetColumnInfo(Context, trainData, columnInfo);
return Execute(columnInfo, columns, preFeaturizer, progressHandler, runner);
}

private IEnumerable<CrossValidationRunDetail<TMetrics>> ExecuteCrossVal(IDataView[] trainDatasets,
private CrossValidationExperimentResult<TMetrics> ExecuteCrossVal(IDataView[] trainDatasets,
ColumnInformation columnInfo,
IDataView[] validationDatasets,
IEstimator<ITransformer> preFeaturizer,
Expand All @@ -266,13 +269,21 @@ private IEnumerable<CrossValidationRunDetail<TMetrics>> ExecuteCrossVal(IDataVie
ITransformer[] preprocessorTransforms = null;
(trainDatasets, validationDatasets, preprocessorTransforms) = ApplyPreFeaturizerCrossVal(trainDatasets, validationDatasets, preFeaturizer);

var runner = new CrossValRunner<TMetrics>(Context, trainDatasets, validationDatasets, _metricsAgent, preFeaturizer,
preprocessorTransforms, columnInfo.LabelColumnName, _settings.DebugLogger);
var runner = new CrossValRunner<TMetrics>(Context, trainDatasets, validationDatasets, MetricsAgent, preFeaturizer,
preprocessorTransforms, columnInfo.LabelColumnName, _logger);
var columns = DatasetColumnInfoUtil.GetDatasetColumnInfo(Context, trainDatasets[0], columnInfo);
return Execute(columnInfo, columns, preFeaturizer, progressHandler, runner);

// Execute experiment & get all pipelines run
var experiment = new Experiment<CrossValidationRunDetail<TMetrics>, TMetrics>(Context, _task, OptimizingMetricInfo, progressHandler,
Settings, MetricsAgent, _trainerWhitelist, columns, runner, _logger);
var runDetails = experiment.Execute();

var bestRun = GetBestCrossValRun(runDetails);
var experimentResult = new CrossValidationExperimentResult<TMetrics>(runDetails, bestRun);
return experimentResult;
}

private IEnumerable<RunDetail<TMetrics>> ExecuteCrossValSummary(IDataView[] trainDatasets,
private ExperimentResult<TMetrics> ExecuteCrossValSummary(IDataView[] trainDatasets,
ColumnInformation columnInfo,
IDataView[] validationDatasets,
IEstimator<ITransformer> preFeaturizer,
Expand All @@ -285,24 +296,26 @@ private IEnumerable<RunDetail<TMetrics>> ExecuteCrossValSummary(IDataView[] trai
ITransformer[] preprocessorTransforms = null;
(trainDatasets, validationDatasets, preprocessorTransforms) = ApplyPreFeaturizerCrossVal(trainDatasets, validationDatasets, preFeaturizer);

var runner = new CrossValSummaryRunner<TMetrics>(Context, trainDatasets, validationDatasets, _metricsAgent, preFeaturizer,
preprocessorTransforms, columnInfo.LabelColumnName, _optimizingMetricInfo, _settings.DebugLogger);
var runner = new CrossValSummaryRunner<TMetrics>(Context, trainDatasets, validationDatasets, MetricsAgent, preFeaturizer,
preprocessorTransforms, columnInfo.LabelColumnName, OptimizingMetricInfo, _logger);
var columns = DatasetColumnInfoUtil.GetDatasetColumnInfo(Context, trainDatasets[0], columnInfo);
return Execute(columnInfo, columns, preFeaturizer, progressHandler, runner);
}

private IEnumerable<TRunDetail> Execute<TRunDetail>(ColumnInformation columnInfo,
private ExperimentResult<TMetrics> Execute(ColumnInformation columnInfo,
DatasetColumnInfo[] columns,
IEstimator<ITransformer> preFeaturizer,
IProgress<TRunDetail> progressHandler,
IRunner<TRunDetail> runner)
where TRunDetail : RunDetail
IProgress<RunDetail<TMetrics>> progressHandler,
IRunner<RunDetail<TMetrics>> runner)
{
// Execute experiment & get all pipelines run
var experiment = new Experiment<TRunDetail, TMetrics>(Context, _task, _optimizingMetricInfo, progressHandler,
_settings, _metricsAgent, _trainerWhitelist, columns, runner);
var experiment = new Experiment<RunDetail<TMetrics>, TMetrics>(Context, _task, OptimizingMetricInfo, progressHandler,
Settings, MetricsAgent, _trainerWhitelist, columns, runner, _logger);
var runDetails = experiment.Execute();

return experiment.Execute();
var bestRun = GetBestRun(runDetails);
var experimentResult = new ExperimentResult<TMetrics>(runDetails, bestRun);
return experimentResult;
}

private static (IDataView[] trainDatasets, IDataView[] validDatasets, ITransformer[] preprocessorTransforms)
Expand Down
Loading