From 9ecd3f0b0eeb5f6d4f30fdb5dd0f60dd3c286770 Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Mon, 11 Mar 2019 19:06:37 -0700 Subject: [PATCH 1/8] Adding functional tests for training scenarios --- test/Microsoft.ML.Functional.Tests/Common.cs | 28 + .../Microsoft.ML.Functional.Tests/Training.cs | 534 ++++++++++++++++++ .../Api/Estimators/Metacomponents.cs | 37 -- 3 files changed, 562 insertions(+), 37 deletions(-) create mode 100644 test/Microsoft.ML.Functional.Tests/Training.cs delete mode 100644 test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs diff --git a/test/Microsoft.ML.Functional.Tests/Common.cs b/test/Microsoft.ML.Functional.Tests/Common.cs index 5756893966..cd7f6757fc 100644 --- a/test/Microsoft.ML.Functional.Tests/Common.cs +++ b/test/Microsoft.ML.Functional.Tests/Common.cs @@ -9,6 +9,7 @@ using Microsoft.ML.Data; using Microsoft.ML.Functional.Tests.Datasets; using Xunit; +using Xunit.Sdk; namespace Microsoft.ML.Functional.Tests { @@ -268,6 +269,33 @@ public static void AssertMetricsStatistics(RegressionMetricsStatistics metrics) AssertMetricStatistics(metrics.LossFunction); } + /// + /// Assert that two float arrays are not equal. + /// + /// An array of floats. + /// An array of floats. + public static void AssertNotEqual(float[] array1, float[] array2) + { + Assert.NotNull(array1); + Assert.NotNull(array2); + Assert.Equal(array1.Length, array2.Length); + + bool mismatch = false; + for (int i = 0; i < array1.Length; i++) + try + { + // Use Assert to test for equality rather than + // to roll our own float equality checker. + Assert.Equal(array1[i], array2[i]); + } + catch(EqualException) + { + mismatch = true; + break; + } + Assert.True(mismatch); + } + /// /// Verify that a float array has no NaNs or infinities. /// diff --git a/test/Microsoft.ML.Functional.Tests/Training.cs b/test/Microsoft.ML.Functional.Tests/Training.cs new file mode 100644 index 0000000000..f7b10331f7 --- /dev/null +++ b/test/Microsoft.ML.Functional.Tests/Training.cs @@ -0,0 +1,534 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Linq; +using Microsoft.ML.Data; +using Microsoft.ML.Functional.Tests.Datasets; +using Microsoft.ML.RunTests; +using Microsoft.ML.TestFramework; +using Microsoft.ML.Trainers; +using Microsoft.ML.Trainers.FastTree; +using Xunit; +using Xunit.Abstractions; + +namespace Microsoft.ML.Functional.Tests +{ + public class Training : BaseTestClass + { + public Training(ITestOutputHelper output) : base(output) + { + } + + /// + /// Training: It is easy to compare trainer evaluations on the same dataset. + /// + [Fact] + public void CompareTrainerEvaluations() + { + var mlContext = new MLContext(seed: 1); + + // Get the dataset. + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), + separatorChar: TestDatasets.Sentiment.fileSeparator, hasHeader: TestDatasets.Sentiment.fileHasHeader, + allowQuoting: TestDatasets.Sentiment.allowQuoting); + var trainTestSplit = mlContext.BinaryClassification.TrainTestSplit(data); + var trainData = trainTestSplit.TrainSet; + var testData = trainTestSplit.TestSet; + + // Create a transformation pipeline. + var featurizationPipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText") + .AppendCacheCheckpoint(mlContext); + + // Create a selection of learners. + var sdcaTrainer = mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent( + new SdcaBinaryTrainer.Options { NumberOfThreads = 1 }); + + var fastTreeTrainer = mlContext.BinaryClassification.Trainers.FastTree( + new FastTreeBinaryClassificationTrainer.Options { NumberOfThreads = 1 }); + + var ffmTrainer = mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine( + new FieldAwareFactorizationMachineBinaryClassificationTrainer.Options { }); + + // Fit the data transformation pipeline. + var featurization = featurizationPipeline.Fit(trainData); + var featurizedTrain = featurization.Transform(trainData); + var featurizedTest = featurization.Transform(testData); + + // Fit the trainers. + var sdca = sdcaTrainer.Fit(featurizedTrain); + var fastTree = fastTreeTrainer.Fit(featurizedTrain); + var ffm = ffmTrainer.Fit(featurizedTrain); + + // Evaluate the trainers. + var sdcaPredictions = sdca.Transform(featurizedTest); + var sdcaMetrics = mlContext.BinaryClassification.EvaluateNonCalibrated(sdcaPredictions); + var fastTreePredictions = fastTree.Transform(featurizedTest); + var fastTreeMetrics = mlContext.BinaryClassification.EvaluateNonCalibrated(fastTreePredictions); + var ffmPredictions = sdca.Transform(featurizedTest); + var ffmMetrics = mlContext.BinaryClassification.EvaluateNonCalibrated(ffmPredictions); + + // Validate the results. + Common.AssertMetrics(sdcaMetrics); + Common.AssertMetrics(fastTreeMetrics); + Common.AssertMetrics(ffmMetrics); + } + + /// + /// Training: Models can be trained starting from an existing model. + /// + [Fact] + public void ContinueTrainingAveragePerceptron() + { + var mlContext = new MLContext(seed: 1); + + // Get the dataset. + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), + separatorChar: TestDatasets.Sentiment.fileSeparator, hasHeader: TestDatasets.Sentiment.fileHasHeader, + allowQuoting: TestDatasets.Sentiment.allowQuoting); + + // Create a transformation pipeline. + var featurizationPipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText") + .AppendCacheCheckpoint(mlContext); + + var trainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron( + new AveragedPerceptronTrainer.Options { NumberOfIterations = 1 }); + + // Fit the data transformation pipeline. + var featurization = featurizationPipeline.Fit(data); + var featurizedData = featurization.Transform(data); + + // Fit the first trainer. + var firstModel = trainer.Fit(featurizedData); + var firstModelWeights = firstModel.Model.Weights; + + // Fist the first trainer again. + var firstModelPrime = trainer.Fit(featurizedData); + var firstModelWeightsPrime = firstModel.Model.Weights; + + // Fit the second trainer. + var secondModel = trainer.Fit(featurizedData, firstModel.Model); + var secondModelWeights = secondModel.Model.Weights; + + // Validate that continued training occurred. + // Training from the same initial condition, same seed should create the same model. + Common.AssertEqual(firstModelWeights.ToArray(), firstModelWeightsPrime.ToArray()); + // Continued training should create a different model. + Common.AssertNotEqual(firstModelWeights.ToArray(), secondModelWeights.ToArray()); + } + + /// + /// Training: Models can be trained starting from an existing model. + /// + [Fact] + public void ContinueTrainingFieldAwareFactorizationMachines() + { + var mlContext = new MLContext(seed: 1); + + // Get the dataset. + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), + separatorChar: TestDatasets.Sentiment.fileSeparator, hasHeader: TestDatasets.Sentiment.fileHasHeader, + allowQuoting: TestDatasets.Sentiment.allowQuoting); + + // Create a transformation pipeline. + var featurizationPipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText") + .AppendCacheCheckpoint(mlContext); + + var trainer = mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine( + new FieldAwareFactorizationMachineBinaryClassificationTrainer.Options { NumberOfIterations = 100 }); + + // Fit the data transformation pipeline. + var featurization = featurizationPipeline.Fit(data); + var featurizedData = featurization.Transform(data); + + // Fit the first trainer. + var firstModel = trainer.Fit(featurizedData); + var firstModelWeights = firstModel.Model.GetLinearWeights(); + + // Fist the first trainer again. + var firstModelPrime = trainer.Fit(featurizedData); + var firstModelWeightsPrime = firstModel.Model.GetLinearWeights(); + + // Fit the second trainer. + var secondModel = trainer.Fit(featurizedData, modelParameters: firstModel.Model); + var secondModelWeights = secondModel.Model.GetLinearWeights(); + + // Validate that continued training occurred. + // Training from the same initial condition, same seed should create the same model. + Assert.Equal(firstModelWeights, firstModelWeightsPrime); + // Continued training should create a different model. + Assert.NotEqual(firstModelWeights, secondModelWeights); + } + + /// + /// Training: Models can be trained starting from an existing model. + /// + [Fact] + public void ContinueTrainingLinearSupportVectorMachine() + { + var mlContext = new MLContext(seed: 1); + + // Get the dataset. + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), + separatorChar: TestDatasets.Sentiment.fileSeparator, hasHeader: TestDatasets.Sentiment.fileHasHeader, + allowQuoting: TestDatasets.Sentiment.allowQuoting); + + // Create a transformation pipeline. + var featurizationPipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText") + .AppendCacheCheckpoint(mlContext); + + var trainer = mlContext.BinaryClassification.Trainers.LinearSupportVectorMachines( + new LinearSvmTrainer.Options { NumberOfIterations = 1 }); + + // Fit the data transformation pipeline. + var featurization = featurizationPipeline.Fit(data); + var featurizedData = featurization.Transform(data); + + // Fit the first trainer. + var firstModel = trainer.Fit(featurizedData); + var firstModelWeights = firstModel.Model.Weights; + + // Fist the first trainer again. + var firstModelPrime = trainer.Fit(featurizedData); + var firstModelWeightsPrime = firstModel.Model.Weights; + + // Fit the second trainer. + var secondModel = trainer.Fit(featurizedData, firstModel.Model); + var secondModelWeights = secondModel.Model.Weights; + + // Validate that continued training occurred. + // Training from the same initial condition, same seed should create the same model. + Common.AssertEqual(firstModelWeights.ToArray(), firstModelWeightsPrime.ToArray()); + // Continued training should create a different model. + Common.AssertNotEqual(firstModelWeights.ToArray(), secondModelWeights.ToArray()); + } + + /// + /// Training: Models can be trained starting from an existing model. + /// + [Fact] + public void ContinueTrainingLogisticRegression() + { + var mlContext = new MLContext(seed: 1); + + // Get the dataset. + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), + separatorChar: TestDatasets.Sentiment.fileSeparator, hasHeader: TestDatasets.Sentiment.fileHasHeader, + allowQuoting: TestDatasets.Sentiment.allowQuoting); + + // Create a transformation pipeline. + var featurizationPipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText") + .AppendCacheCheckpoint(mlContext); + + var trainer = mlContext.BinaryClassification.Trainers.LogisticRegression( + new LogisticRegression.Options { NumberOfThreads = 1, NumberOfIterations = 10 }); + + // Fit the data transformation pipeline. + var featurization = featurizationPipeline.Fit(data); + var featurizedData = featurization.Transform(data); + + // Fit the first trainer. + var firstModel = trainer.Fit(featurizedData); + var firstModelWeights = firstModel.Model.SubModel.Weights; + + // Fist the first trainer again. + var firstModelPrime = trainer.Fit(featurizedData); + var firstModelWeightsPrime = firstModel.Model.SubModel.Weights; + + // Fit the second trainer. + var secondModel = trainer.Fit(featurizedData, firstModel.Model.SubModel); + var secondModelWeights = secondModel.Model.SubModel.Weights; + + // Validate that continued training occurred. + // Training from the same initial condition, same seed should create the same model. + Common.AssertEqual(firstModelWeights.ToArray(), firstModelWeightsPrime.ToArray()); + // Continued training should create a different model. + Common.AssertNotEqual(firstModelWeights.ToArray(), secondModelWeights.ToArray()); + } + + /// + /// Training: Models can be trained starting from an existing model. + /// + [Fact] + public void ContinueTrainingLogisticRegressionMulticlass() + { + var mlContext = new MLContext(seed: 1); + + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.iris.trainFilename), + hasHeader: TestDatasets.iris.fileHasHeader, + separatorChar: TestDatasets.iris.fileSeparator); + + // Create a training pipeline. + var featurizationPipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) + .AppendCacheCheckpoint(mlContext); + + var trainer = mlContext.MulticlassClassification.Trainers.LogisticRegression( + new MulticlassLogisticRegression.Options { NumberOfThreads = 1, NumberOfIterations = 10 }); + + // Fit the data transformation pipeline. + var featurization = featurizationPipeline.Fit(data); + var featurizedData = featurization.Transform(data); + + // Fit the first trainer. + var firstModel = trainer.Fit(featurizedData); + VBuffer[] firstModelWeights = null; + firstModel.Model.GetWeights(ref firstModelWeights, out int firstModelNumClasses); + + // Fist the first trainer again. + var firstModelPrime = trainer.Fit(featurizedData); + VBuffer[] firstModelWeightsPrime = null; + firstModel.Model.GetWeights(ref firstModelWeightsPrime, out int firstModelNumClassesPrime); + + // Fit the second trainer. + var secondModel = trainer.Fit(featurizedData, firstModel.Model); + VBuffer[] secondModelWeights = null; + secondModel.Model.GetWeights(ref secondModelWeights, out int secondModelNumClasses); + + // Validate that continued training occurred. + // Training from the same initial condition, same seed should create the same model. + Assert.Equal(firstModelNumClasses, firstModelNumClassesPrime); + for (int i = 0; i < firstModelNumClasses; i++) + Common.AssertEqual(firstModelWeights[i].DenseValues().ToArray(), firstModelWeightsPrime[i].DenseValues().ToArray()); + // Continued training should create a different model. + Assert.Equal(firstModelNumClasses, secondModelNumClasses); + for (int i = 0; i < firstModelNumClasses; i++) + Common.AssertNotEqual(firstModelWeights[i].DenseValues().ToArray(), secondModelWeights[i].DenseValues().ToArray()); + } + + /// + /// Training: Models can be trained starting from an existing model. + /// + [Fact] + public void ContinueTrainingOnlineGradientDescent() + { + var mlContext = new MLContext(seed: 1); + + // Get the dataset. + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), + separatorChar: TestDatasets.housing.fileSeparator, hasHeader: TestDatasets.housing.fileHasHeader); + + // Create a transformation pipeline. + var featurizationPipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) + .Append(mlContext.Transforms.Normalize("Features")) + .AppendCacheCheckpoint(mlContext); + + var trainer = mlContext.Regression.Trainers.OnlineGradientDescent( + new OnlineGradientDescentTrainer.Options { NumberOfIterations = 10 }); + + // Fit the data transformation pipeline. + var featurization = featurizationPipeline.Fit(data); + var featurizedData = featurization.Transform(data); + + // Fit the first trainer. + var firstModel = trainer.Fit(featurizedData); + var firstModelWeights = firstModel.Model.Weights; + + // Fist the first trainer again. + var firstModelPrime = trainer.Fit(featurizedData); + var firstModelWeightsPrime = firstModel.Model.Weights; + + // Fit the second trainer. + var secondModel = trainer.Fit(featurizedData, firstModel.Model); + var secondModelWeights = secondModel.Model.Weights; + + // Validate that continued training occurred. + // Training from the same initial condition, same seed should create the same model. + Common.AssertEqual(firstModelWeights.ToArray(), firstModelWeightsPrime.ToArray()); + // Continued training should create a different model. + Common.AssertNotEqual(firstModelWeights.ToArray(), secondModelWeights.ToArray()); + } + + /// + /// Training: Models can be trained starting from an existing model. + /// + [Fact] + public void ContinueTrainingLinearSymbolicStochasticGradientDescent() + { + var mlContext = new MLContext(seed: 1); + + // Get the dataset. + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), + separatorChar: TestDatasets.Sentiment.fileSeparator, hasHeader: TestDatasets.Sentiment.fileHasHeader, + allowQuoting: TestDatasets.Sentiment.allowQuoting); + + // Create a transformation pipeline. + var featurizationPipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText") + .AppendCacheCheckpoint(mlContext); + + var trainer = mlContext.BinaryClassification.Trainers.SymbolicStochasticGradientDescent( + new SymbolicStochasticGradientDescentClassificationTrainer.Options + { + NumberOfThreads = 1, + NumberOfIterations = 10 + }); + + // Fit the data transformation pipeline. + var featurization = featurizationPipeline.Fit(data); + var featurizedData = featurization.Transform(data); + + // Fit the first trainer. + var firstModel = trainer.Fit(featurizedData); + var firstModelWeights = firstModel.Model.SubModel.Weights; + + // Fist the first trainer again. + var firstModelPrime = trainer.Fit(featurizedData); + var firstModelWeightsPrime = firstModel.Model.SubModel.Weights; + + // Fit the second trainer. + var secondModel = trainer.Fit(featurizedData, firstModel.Model.SubModel); + var secondModelWeights = secondModel.Model.SubModel.Weights; + + // Validate that continued training occurred. + // Training from the same initial condition, same seed should create the same model. + Common.AssertEqual(firstModelWeights.ToArray(), firstModelWeightsPrime.ToArray()); + // Continued training should create a different model. + Common.AssertNotEqual(firstModelWeights.ToArray(), secondModelWeights.ToArray()); + } + + /// + /// Training: Models can be trained starting from an existing model. + /// + [Fact] + public void ContinueTrainingPoissonRegression() + { + var mlContext = new MLContext(seed: 1); + + // Get the dataset. + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), + separatorChar: TestDatasets.housing.fileSeparator, hasHeader: TestDatasets.housing.fileHasHeader); + + // Create a transformation pipeline. + var featurizationPipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) + .Append(mlContext.Transforms.Normalize("Features")) + .AppendCacheCheckpoint(mlContext); + + var trainer = mlContext.Regression.Trainers.PoissonRegression( + new PoissonRegression.Options { NumberOfThreads = 1, NumberOfIterations = 100 }); + + // Fit the data transformation pipeline. + var featurization = featurizationPipeline.Fit(data); + var featurizedData = featurization.Transform(data); + + // Fit the first trainer. + var firstModel = trainer.Fit(featurizedData); + var firstModelWeights = firstModel.Model.Weights; + + // Fist the first trainer again. + var firstModelPrime = trainer.Fit(featurizedData); + var firstModelWeightsPrime = firstModel.Model.Weights; + + // Fit the second trainer. + var secondModel = trainer.Fit(featurizedData, firstModel.Model); + var secondModelWeights = secondModel.Model.Weights; + + // Validate that continued training occurred. + // Training from the same initial condition, same seed should create the same model. + Common.AssertEqual(firstModelWeights.ToArray(), firstModelWeightsPrime.ToArray()); + // Continued training should create a different model. + Common.AssertNotEqual(firstModelWeights.ToArray(), secondModelWeights.ToArray()); + } + + /// + /// Training: Meta-compononts function as expected. For OVA (one-versus-all), a user will be able to specify only + /// binary classifier trainers. If they specify a different model class there should be a compile error. + /// + [Fact] + public void MetacomponentsFunctionAsExpectedOva() + { + var mlContext = new MLContext(seed: 1); + + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.iris.trainFilename), + hasHeader: TestDatasets.iris.fileHasHeader, + separatorChar: TestDatasets.iris.fileSeparator); + + // Create a model training an OVA trainer with a binary classifier. + var anomalyDetectionTrainer = mlContext.AnomalyDetection.Trainers.AnalyzeRandomizedPrincipalComponents(); + var anomalyDetectionPipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) + .AppendCacheCheckpoint(mlContext) + .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) + .Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(anomalyDetectionTrainer)) + .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); + + // Fit the binary classification pipeline. + Assert.Throws(() => anomalyDetectionPipeline.Fit(data)); + + // Create a model training an OVA trainer with a binary classifier. + var binaryclassificationTrainer = mlContext.BinaryClassification.Trainers.LogisticRegression( + new LogisticRegression.Options { NumberOfIterations = 10, NumberOfThreads = 1, }); + var binaryClassificationPipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) + .AppendCacheCheckpoint(mlContext) + .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) + .Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(binaryclassificationTrainer)) + .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); + + // Fit the binary classification pipeline. + var binaryClassificationModel = binaryClassificationPipeline.Fit(data); + + // Transform the data + var binaryClassificationPredictions = binaryClassificationModel.Transform(data); + + // Evaluate the model. + var binaryClassificationMetrics = mlContext.MulticlassClassification.Evaluate(binaryClassificationPredictions); + + // Create a model training an OVA trainer with a clustering trainer. + var kmeansTrainer = mlContext.Clustering.Trainers.KMeans( + new KMeansPlusPlusTrainer.Options { MaximumNumberOfIterations = 10, NumberOfThreads = 1, }); + + Assert.Throws(() => + mlContext.Transforms.Concatenate("Features", Iris.Features) + .AppendCacheCheckpoint(mlContext) + .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) + .Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(kmeansTrainer)) + .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"))); + + // Create a model training an OVA trainer with a multiclass classification trainer. + var multiclassTrainer = mlContext.MulticlassClassification.Trainers.LogisticRegression( + new MulticlassLogisticRegression.Options { NumberOfIterations = 10, NumberOfThreads = 1, }); + Assert.Throws(() => + mlContext.Transforms.Concatenate("Features", Iris.Features) + .AppendCacheCheckpoint(mlContext) + .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) + .Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(multiclassTrainer)) + .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"))); + + // Create a model training an OVA trainer with a ranking trainer. + var rankingTrainer = mlContext.Ranking.Trainers.FastTree( + new FastTreeRankingTrainer.Options { NumberOfTrees = 2, NumberOfThreads = 1, }); + var rankingPipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) + .AppendCacheCheckpoint(mlContext) + .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) + .Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(rankingTrainer)) + .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); + + // Fit the invalid pipeline. + // Todo #XXX: Make this fail somehow. + var rankingModel = rankingPipeline.Fit(data); + + // Transform the data + var rankingPredictions = rankingModel.Transform(data); + + // Evaluate the model. + var rankingMetrics = mlContext.MulticlassClassification.Evaluate(rankingPredictions); + + // Create a model training an OVA trainer with a regressor. + var regressionTrainer = mlContext.Regression.Trainers.PoissonRegression( + new PoissonRegression.Options { NumberOfIterations = 10, NumberOfThreads = 1, }); + var regressionPipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) + .AppendCacheCheckpoint(mlContext) + .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) + .Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(regressionTrainer)) + .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); + + // Fit the invalid pipeline. + // Todo #XXX: Make this fail somehow. + var regressionModel = regressionPipeline.Fit(data); + + // Transform the data + var regressionPredictions = regressionModel.Transform(data); + + // Evaluate the model. + var regressionMetrics = mlContext.MulticlassClassification.Evaluate(regressionPredictions); + } + } +} \ No newline at end of file diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs deleted file mode 100644 index 6f909cef69..0000000000 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs +++ /dev/null @@ -1,37 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Data; -using Microsoft.ML.RunTests; -using Microsoft.ML.Trainers; -using Microsoft.ML.Transforms; -using Xunit; - -namespace Microsoft.ML.Tests.Scenarios.Api -{ - public partial class ApiScenariosTests - { - /// - /// Meta-components: Meta-components (for example, components that themselves instantiate components) should not be booby-trapped. - /// When specifying what trainer OVA should use, a user will be able to specify any binary classifier. - /// If they specify a regression or multi-class classifier ideally that should be a compile error. - /// - [Fact] - public void Metacomponents() - { - var ml = new MLContext(); - var data = ml.Data.LoadFromTextFile(GetDataPath(TestDatasets.irisData.trainFilename), separatorChar: ','); - - var sdcaTrainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated( - new SdcaNonCalibratedBinaryTrainer.Options { MaximumNumberOfIterations = 100, Shuffle = true, NumberOfThreads = 1, }); - - var pipeline = new ColumnConcatenatingEstimator (ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") - .Append(ml.Transforms.Conversion.MapValueToKey("Label"), TransformerScope.TrainTest) - .Append(ml.MulticlassClassification.Trainers.OneVersusAll(sdcaTrainer)) - .Append(ml.Transforms.Conversion.MapKeyToValue(("PredictedLabel"))); - - var model = pipeline.Fit(data); - } - } -} \ No newline at end of file From 11b95756b49533158715ac62fb1af41ab88deb04 Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Mon, 11 Mar 2019 19:09:01 -0700 Subject: [PATCH 2/8] Adding todo issue number. --- test/Microsoft.ML.Functional.Tests/Training.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Microsoft.ML.Functional.Tests/Training.cs b/test/Microsoft.ML.Functional.Tests/Training.cs index f7b10331f7..d507580529 100644 --- a/test/Microsoft.ML.Functional.Tests/Training.cs +++ b/test/Microsoft.ML.Functional.Tests/Training.cs @@ -502,7 +502,7 @@ public void MetacomponentsFunctionAsExpectedOva() .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); // Fit the invalid pipeline. - // Todo #XXX: Make this fail somehow. + // Todo #2920: Make this fail somehow. var rankingModel = rankingPipeline.Fit(data); // Transform the data @@ -521,7 +521,7 @@ public void MetacomponentsFunctionAsExpectedOva() .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); // Fit the invalid pipeline. - // Todo #XXX: Make this fail somehow. + // Todo #2920: Make this fail somehow. var regressionModel = regressionPipeline.Fit(data); // Transform the data From b48c1fbf6835147e57e95a4f6298bd27ae6e33f9 Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Tue, 12 Mar 2019 10:58:55 -0700 Subject: [PATCH 3/8] Addressing PR comments. --- .../Microsoft.ML.Functional.Tests/Training.cs | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/test/Microsoft.ML.Functional.Tests/Training.cs b/test/Microsoft.ML.Functional.Tests/Training.cs index d507580529..9022f5f6d9 100644 --- a/test/Microsoft.ML.Functional.Tests/Training.cs +++ b/test/Microsoft.ML.Functional.Tests/Training.cs @@ -31,7 +31,8 @@ public void CompareTrainerEvaluations() // Get the dataset. var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), - separatorChar: TestDatasets.Sentiment.fileSeparator, hasHeader: TestDatasets.Sentiment.fileHasHeader, + separatorChar: TestDatasets.Sentiment.fileSeparator, + hasHeader: TestDatasets.Sentiment.fileHasHeader, allowQuoting: TestDatasets.Sentiment.allowQuoting); var trainTestSplit = mlContext.BinaryClassification.TrainTestSplit(data); var trainData = trainTestSplit.TrainSet; @@ -85,7 +86,8 @@ public void ContinueTrainingAveragePerceptron() // Get the dataset. var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), - separatorChar: TestDatasets.Sentiment.fileSeparator, hasHeader: TestDatasets.Sentiment.fileHasHeader, + separatorChar: TestDatasets.Sentiment.fileSeparator, + hasHeader: TestDatasets.Sentiment.fileHasHeader, allowQuoting: TestDatasets.Sentiment.allowQuoting); // Create a transformation pipeline. @@ -128,7 +130,8 @@ public void ContinueTrainingFieldAwareFactorizationMachines() // Get the dataset. var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), - separatorChar: TestDatasets.Sentiment.fileSeparator, hasHeader: TestDatasets.Sentiment.fileHasHeader, + separatorChar: TestDatasets.Sentiment.fileSeparator, + hasHeader: TestDatasets.Sentiment.fileHasHeader, allowQuoting: TestDatasets.Sentiment.allowQuoting); // Create a transformation pipeline. @@ -171,7 +174,8 @@ public void ContinueTrainingLinearSupportVectorMachine() // Get the dataset. var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), - separatorChar: TestDatasets.Sentiment.fileSeparator, hasHeader: TestDatasets.Sentiment.fileHasHeader, + separatorChar: TestDatasets.Sentiment.fileSeparator, + hasHeader: TestDatasets.Sentiment.fileHasHeader, allowQuoting: TestDatasets.Sentiment.allowQuoting); // Create a transformation pipeline. @@ -214,7 +218,8 @@ public void ContinueTrainingLogisticRegression() // Get the dataset. var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), - separatorChar: TestDatasets.Sentiment.fileSeparator, hasHeader: TestDatasets.Sentiment.fileHasHeader, + separatorChar: TestDatasets.Sentiment.fileSeparator, + hasHeader: TestDatasets.Sentiment.fileHasHeader, allowQuoting: TestDatasets.Sentiment.allowQuoting); // Create a transformation pipeline. @@ -256,8 +261,8 @@ public void ContinueTrainingLogisticRegressionMulticlass() var mlContext = new MLContext(seed: 1); var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.iris.trainFilename), - hasHeader: TestDatasets.iris.fileHasHeader, - separatorChar: TestDatasets.iris.fileSeparator); + hasHeader: TestDatasets.iris.fileHasHeader, + separatorChar: TestDatasets.iris.fileSeparator); // Create a training pipeline. var featurizationPipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) @@ -306,7 +311,8 @@ public void ContinueTrainingOnlineGradientDescent() // Get the dataset. var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), - separatorChar: TestDatasets.housing.fileSeparator, hasHeader: TestDatasets.housing.fileHasHeader); + separatorChar: TestDatasets.housing.fileSeparator, + hasHeader: TestDatasets.housing.fileHasHeader); // Create a transformation pipeline. var featurizationPipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) @@ -349,7 +355,8 @@ public void ContinueTrainingLinearSymbolicStochasticGradientDescent() // Get the dataset. var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), - separatorChar: TestDatasets.Sentiment.fileSeparator, hasHeader: TestDatasets.Sentiment.fileHasHeader, + separatorChar: TestDatasets.Sentiment.fileSeparator, + hasHeader: TestDatasets.Sentiment.fileHasHeader, allowQuoting: TestDatasets.Sentiment.allowQuoting); // Create a transformation pipeline. @@ -396,7 +403,8 @@ public void ContinueTrainingPoissonRegression() // Get the dataset. var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), - separatorChar: TestDatasets.housing.fileSeparator, hasHeader: TestDatasets.housing.fileHasHeader); + separatorChar: TestDatasets.housing.fileSeparator, + hasHeader: TestDatasets.housing.fileHasHeader); // Create a transformation pipeline. var featurizationPipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) @@ -439,8 +447,8 @@ public void MetacomponentsFunctionAsExpectedOva() var mlContext = new MLContext(seed: 1); var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.iris.trainFilename), - hasHeader: TestDatasets.iris.fileHasHeader, - separatorChar: TestDatasets.iris.fileSeparator); + hasHeader: TestDatasets.iris.fileHasHeader, + separatorChar: TestDatasets.iris.fileSeparator); // Create a model training an OVA trainer with a binary classifier. var anomalyDetectionTrainer = mlContext.AnomalyDetection.Trainers.AnalyzeRandomizedPrincipalComponents(); From 9671001617467100684a55eaf3cf082082859789 Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Tue, 12 Mar 2019 11:28:43 -0700 Subject: [PATCH 4/8] Rebasing and fixing to reflect changes in master. --- .../Microsoft.ML.Functional.Tests/Training.cs | 28 ++++++------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/test/Microsoft.ML.Functional.Tests/Training.cs b/test/Microsoft.ML.Functional.Tests/Training.cs index 9022f5f6d9..22b2bdb0c1 100644 --- a/test/Microsoft.ML.Functional.Tests/Training.cs +++ b/test/Microsoft.ML.Functional.Tests/Training.cs @@ -32,9 +32,9 @@ public void CompareTrainerEvaluations() // Get the dataset. var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), separatorChar: TestDatasets.Sentiment.fileSeparator, - hasHeader: TestDatasets.Sentiment.fileHasHeader, + hasHeader: TestDatasets.Sentiment.fileHasHeader, allowQuoting: TestDatasets.Sentiment.allowQuoting); - var trainTestSplit = mlContext.BinaryClassification.TrainTestSplit(data); + var trainTestSplit = mlContext.Data.TrainTestSplit(data); var trainData = trainTestSplit.TrainSet; var testData = trainTestSplit.TestSet; @@ -266,6 +266,7 @@ public void ContinueTrainingLogisticRegressionMulticlass() // Create a training pipeline. var featurizationPipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) + .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) .AppendCacheCheckpoint(mlContext); var trainer = mlContext.MulticlassClassification.Trainers.LogisticRegression( @@ -467,8 +468,7 @@ public void MetacomponentsFunctionAsExpectedOva() var binaryClassificationPipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) .AppendCacheCheckpoint(mlContext) .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) - .Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(binaryclassificationTrainer)) - .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); + .Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(binaryclassificationTrainer)); // Fit the binary classification pipeline. var binaryClassificationModel = binaryClassificationPipeline.Fit(data); @@ -503,6 +503,7 @@ public void MetacomponentsFunctionAsExpectedOva() // Create a model training an OVA trainer with a ranking trainer. var rankingTrainer = mlContext.Ranking.Trainers.FastTree( new FastTreeRankingTrainer.Options { NumberOfTrees = 2, NumberOfThreads = 1, }); + // Todo #2920: Make this fail somehow. var rankingPipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) .AppendCacheCheckpoint(mlContext) .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) @@ -510,18 +511,12 @@ public void MetacomponentsFunctionAsExpectedOva() .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); // Fit the invalid pipeline. - // Todo #2920: Make this fail somehow. - var rankingModel = rankingPipeline.Fit(data); - - // Transform the data - var rankingPredictions = rankingModel.Transform(data); - - // Evaluate the model. - var rankingMetrics = mlContext.MulticlassClassification.Evaluate(rankingPredictions); + Assert.Throws(() => rankingPipeline.Fit(data)); // Create a model training an OVA trainer with a regressor. var regressionTrainer = mlContext.Regression.Trainers.PoissonRegression( new PoissonRegression.Options { NumberOfIterations = 10, NumberOfThreads = 1, }); + // Todo #2920: Make this fail somehow. var regressionPipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) .AppendCacheCheckpoint(mlContext) .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) @@ -529,14 +524,7 @@ public void MetacomponentsFunctionAsExpectedOva() .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); // Fit the invalid pipeline. - // Todo #2920: Make this fail somehow. - var regressionModel = regressionPipeline.Fit(data); - - // Transform the data - var regressionPredictions = regressionModel.Transform(data); - - // Evaluate the model. - var regressionMetrics = mlContext.MulticlassClassification.Evaluate(regressionPredictions); + Assert.Throws(() => regressionPipeline.Fit(data)); } } } \ No newline at end of file From 1950ef8ee67ee330baf6d8df1781370f81aa9ec0 Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Tue, 12 Mar 2019 13:14:06 -0700 Subject: [PATCH 5/8] Addressing changes to Master. --- test/Microsoft.ML.Functional.Tests/Training.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/Microsoft.ML.Functional.Tests/Training.cs b/test/Microsoft.ML.Functional.Tests/Training.cs index 22b2bdb0c1..5cc9451868 100644 --- a/test/Microsoft.ML.Functional.Tests/Training.cs +++ b/test/Microsoft.ML.Functional.Tests/Training.cs @@ -227,7 +227,7 @@ public void ContinueTrainingLogisticRegression() .AppendCacheCheckpoint(mlContext); var trainer = mlContext.BinaryClassification.Trainers.LogisticRegression( - new LogisticRegression.Options { NumberOfThreads = 1, NumberOfIterations = 10 }); + new LogisticRegression.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 }); // Fit the data transformation pipeline. var featurization = featurizationPipeline.Fit(data); @@ -270,7 +270,7 @@ public void ContinueTrainingLogisticRegressionMulticlass() .AppendCacheCheckpoint(mlContext); var trainer = mlContext.MulticlassClassification.Trainers.LogisticRegression( - new MulticlassLogisticRegression.Options { NumberOfThreads = 1, NumberOfIterations = 10 }); + new MulticlassLogisticRegression.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 }); // Fit the data transformation pipeline. var featurization = featurizationPipeline.Fit(data); @@ -413,7 +413,7 @@ public void ContinueTrainingPoissonRegression() .AppendCacheCheckpoint(mlContext); var trainer = mlContext.Regression.Trainers.PoissonRegression( - new PoissonRegression.Options { NumberOfThreads = 1, NumberOfIterations = 100 }); + new PoissonRegression.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 100 }); // Fit the data transformation pipeline. var featurization = featurizationPipeline.Fit(data); @@ -464,7 +464,7 @@ public void MetacomponentsFunctionAsExpectedOva() // Create a model training an OVA trainer with a binary classifier. var binaryclassificationTrainer = mlContext.BinaryClassification.Trainers.LogisticRegression( - new LogisticRegression.Options { NumberOfIterations = 10, NumberOfThreads = 1, }); + new LogisticRegression.Options { MaximumNumberOfIterations = 10, NumberOfThreads = 1, }); var binaryClassificationPipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) .AppendCacheCheckpoint(mlContext) .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) @@ -492,7 +492,7 @@ public void MetacomponentsFunctionAsExpectedOva() // Create a model training an OVA trainer with a multiclass classification trainer. var multiclassTrainer = mlContext.MulticlassClassification.Trainers.LogisticRegression( - new MulticlassLogisticRegression.Options { NumberOfIterations = 10, NumberOfThreads = 1, }); + new MulticlassLogisticRegression.Options { MaximumNumberOfIterations = 10, NumberOfThreads = 1, }); Assert.Throws(() => mlContext.Transforms.Concatenate("Features", Iris.Features) .AppendCacheCheckpoint(mlContext) @@ -515,7 +515,7 @@ public void MetacomponentsFunctionAsExpectedOva() // Create a model training an OVA trainer with a regressor. var regressionTrainer = mlContext.Regression.Trainers.PoissonRegression( - new PoissonRegression.Options { NumberOfIterations = 10, NumberOfThreads = 1, }); + new PoissonRegression.Options { MaximumNumberOfIterations = 10, NumberOfThreads = 1, }); // Todo #2920: Make this fail somehow. var regressionPipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) .AppendCacheCheckpoint(mlContext) From 4975a7e972d1c2c57a53d15b08edbe5882fbe73c Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Wed, 13 Mar 2019 12:55:20 -0700 Subject: [PATCH 6/8] Addressing PR comments. --- test/Microsoft.ML.Functional.Tests/Training.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Microsoft.ML.Functional.Tests/Training.cs b/test/Microsoft.ML.Functional.Tests/Training.cs index 5cc9451868..9e493ff1a7 100644 --- a/test/Microsoft.ML.Functional.Tests/Training.cs +++ b/test/Microsoft.ML.Functional.Tests/Training.cs @@ -124,7 +124,7 @@ public void ContinueTrainingAveragePerceptron() /// Training: Models can be trained starting from an existing model. /// [Fact] - public void ContinueTrainingFieldAwareFactorizationMachines() + public void ContinueTrainingFieldAwareFactorizationMachine() { var mlContext = new MLContext(seed: 1); From 270e6262e695479f3f66f2c2850eb124547af780 Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Wed, 13 Mar 2019 15:19:10 -0700 Subject: [PATCH 7/8] Fixing due to breaking changes in master. --- .../Microsoft.ML.Functional.Tests/Training.cs | 80 +++++++++---------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/test/Microsoft.ML.Functional.Tests/Training.cs b/test/Microsoft.ML.Functional.Tests/Training.cs index 9e493ff1a7..81a6fc3ccc 100644 --- a/test/Microsoft.ML.Functional.Tests/Training.cs +++ b/test/Microsoft.ML.Functional.Tests/Training.cs @@ -43,14 +43,14 @@ public void CompareTrainerEvaluations() .AppendCacheCheckpoint(mlContext); // Create a selection of learners. - var sdcaTrainer = mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent( - new SdcaBinaryTrainer.Options { NumberOfThreads = 1 }); + var sdcaTrainer = mlContext.BinaryClassification.Trainers.SdcaCalibrated( + new SdcaCalibratedBinaryClassificationTrainer.Options { NumberOfThreads = 1 }); var fastTreeTrainer = mlContext.BinaryClassification.Trainers.FastTree( new FastTreeBinaryClassificationTrainer.Options { NumberOfThreads = 1 }); var ffmTrainer = mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine( - new FieldAwareFactorizationMachineBinaryClassificationTrainer.Options { }); + new FieldAwareFactorizationMachineTrainer.Options { }); // Fit the data transformation pipeline. var featurization = featurizationPipeline.Fit(trainData); @@ -139,7 +139,7 @@ public void ContinueTrainingFieldAwareFactorizationMachine() .AppendCacheCheckpoint(mlContext); var trainer = mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine( - new FieldAwareFactorizationMachineBinaryClassificationTrainer.Options { NumberOfIterations = 100 }); + new FieldAwareFactorizationMachineTrainer.Options { NumberOfIterations = 100 }); // Fit the data transformation pipeline. var featurization = featurizationPipeline.Fit(data); @@ -182,7 +182,7 @@ public void ContinueTrainingLinearSupportVectorMachine() var featurizationPipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText") .AppendCacheCheckpoint(mlContext); - var trainer = mlContext.BinaryClassification.Trainers.LinearSupportVectorMachines( + var trainer = mlContext.BinaryClassification.Trainers.LinearSvm( new LinearSvmTrainer.Options { NumberOfIterations = 1 }); // Fit the data transformation pipeline. @@ -227,7 +227,7 @@ public void ContinueTrainingLogisticRegression() .AppendCacheCheckpoint(mlContext); var trainer = mlContext.BinaryClassification.Trainers.LogisticRegression( - new LogisticRegression.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 }); + new LogisticRegressionBinaryClassificationTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 }); // Fit the data transformation pipeline. var featurization = featurizationPipeline.Fit(data); @@ -270,7 +270,7 @@ public void ContinueTrainingLogisticRegressionMulticlass() .AppendCacheCheckpoint(mlContext); var trainer = mlContext.MulticlassClassification.Trainers.LogisticRegression( - new MulticlassLogisticRegression.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 }); + new LogisticRegressionMulticlassClassificationTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 }); // Fit the data transformation pipeline. var featurization = featurizationPipeline.Fit(data); @@ -350,26 +350,22 @@ public void ContinueTrainingOnlineGradientDescent() /// Training: Models can be trained starting from an existing model. /// [Fact] - public void ContinueTrainingLinearSymbolicStochasticGradientDescent() + public void ContinueTrainingPoissonRegression() { var mlContext = new MLContext(seed: 1); // Get the dataset. - var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), - separatorChar: TestDatasets.Sentiment.fileSeparator, - hasHeader: TestDatasets.Sentiment.fileHasHeader, - allowQuoting: TestDatasets.Sentiment.allowQuoting); + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), + separatorChar: TestDatasets.housing.fileSeparator, + hasHeader: TestDatasets.housing.fileHasHeader); // Create a transformation pipeline. - var featurizationPipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText") + var featurizationPipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) + .Append(mlContext.Transforms.Normalize("Features")) .AppendCacheCheckpoint(mlContext); - var trainer = mlContext.BinaryClassification.Trainers.SymbolicStochasticGradientDescent( - new SymbolicStochasticGradientDescentClassificationTrainer.Options - { - NumberOfThreads = 1, - NumberOfIterations = 10 - }); + var trainer = mlContext.Regression.Trainers.PoissonRegression( + new PoissonRegressionTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 100 }); // Fit the data transformation pipeline. var featurization = featurizationPipeline.Fit(data); @@ -377,15 +373,15 @@ public void ContinueTrainingLinearSymbolicStochasticGradientDescent() // Fit the first trainer. var firstModel = trainer.Fit(featurizedData); - var firstModelWeights = firstModel.Model.SubModel.Weights; + var firstModelWeights = firstModel.Model.Weights; // Fist the first trainer again. var firstModelPrime = trainer.Fit(featurizedData); - var firstModelWeightsPrime = firstModel.Model.SubModel.Weights; + var firstModelWeightsPrime = firstModel.Model.Weights; // Fit the second trainer. - var secondModel = trainer.Fit(featurizedData, firstModel.Model.SubModel); - var secondModelWeights = secondModel.Model.SubModel.Weights; + var secondModel = trainer.Fit(featurizedData, firstModel.Model); + var secondModelWeights = secondModel.Model.Weights; // Validate that continued training occurred. // Training from the same initial condition, same seed should create the same model. @@ -398,22 +394,26 @@ public void ContinueTrainingLinearSymbolicStochasticGradientDescent() /// Training: Models can be trained starting from an existing model. /// [Fact] - public void ContinueTrainingPoissonRegression() + public void ContinueTrainingSymbolicStochasticGradientDescent() { var mlContext = new MLContext(seed: 1); // Get the dataset. - var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), - separatorChar: TestDatasets.housing.fileSeparator, - hasHeader: TestDatasets.housing.fileHasHeader); + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), + separatorChar: TestDatasets.Sentiment.fileSeparator, + hasHeader: TestDatasets.Sentiment.fileHasHeader, + allowQuoting: TestDatasets.Sentiment.allowQuoting); // Create a transformation pipeline. - var featurizationPipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) - .Append(mlContext.Transforms.Normalize("Features")) + var featurizationPipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText") .AppendCacheCheckpoint(mlContext); - var trainer = mlContext.Regression.Trainers.PoissonRegression( - new PoissonRegression.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 100 }); + var trainer = mlContext.BinaryClassification.Trainers.SymbolicSgd( + new SymbolicSgdTrainer.Options + { + NumberOfThreads = 1, + NumberOfIterations = 10 + }); // Fit the data transformation pipeline. var featurization = featurizationPipeline.Fit(data); @@ -421,15 +421,15 @@ public void ContinueTrainingPoissonRegression() // Fit the first trainer. var firstModel = trainer.Fit(featurizedData); - var firstModelWeights = firstModel.Model.Weights; + var firstModelWeights = firstModel.Model.SubModel.Weights; // Fist the first trainer again. var firstModelPrime = trainer.Fit(featurizedData); - var firstModelWeightsPrime = firstModel.Model.Weights; + var firstModelWeightsPrime = firstModel.Model.SubModel.Weights; // Fit the second trainer. - var secondModel = trainer.Fit(featurizedData, firstModel.Model); - var secondModelWeights = secondModel.Model.Weights; + var secondModel = trainer.Fit(featurizedData, firstModel.Model.SubModel); + var secondModelWeights = secondModel.Model.SubModel.Weights; // Validate that continued training occurred. // Training from the same initial condition, same seed should create the same model. @@ -452,7 +452,7 @@ public void MetacomponentsFunctionAsExpectedOva() separatorChar: TestDatasets.iris.fileSeparator); // Create a model training an OVA trainer with a binary classifier. - var anomalyDetectionTrainer = mlContext.AnomalyDetection.Trainers.AnalyzeRandomizedPrincipalComponents(); + var anomalyDetectionTrainer = mlContext.AnomalyDetection.Trainers.RandomizedPca(); var anomalyDetectionPipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) .AppendCacheCheckpoint(mlContext) .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) @@ -464,7 +464,7 @@ public void MetacomponentsFunctionAsExpectedOva() // Create a model training an OVA trainer with a binary classifier. var binaryclassificationTrainer = mlContext.BinaryClassification.Trainers.LogisticRegression( - new LogisticRegression.Options { MaximumNumberOfIterations = 10, NumberOfThreads = 1, }); + new LogisticRegressionBinaryClassificationTrainer.Options { MaximumNumberOfIterations = 10, NumberOfThreads = 1, }); var binaryClassificationPipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) .AppendCacheCheckpoint(mlContext) .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) @@ -481,7 +481,7 @@ public void MetacomponentsFunctionAsExpectedOva() // Create a model training an OVA trainer with a clustering trainer. var kmeansTrainer = mlContext.Clustering.Trainers.KMeans( - new KMeansPlusPlusTrainer.Options { MaximumNumberOfIterations = 10, NumberOfThreads = 1, }); + new KMeansTrainer.Options { MaximumNumberOfIterations = 10, NumberOfThreads = 1, }); Assert.Throws(() => mlContext.Transforms.Concatenate("Features", Iris.Features) @@ -492,7 +492,7 @@ public void MetacomponentsFunctionAsExpectedOva() // Create a model training an OVA trainer with a multiclass classification trainer. var multiclassTrainer = mlContext.MulticlassClassification.Trainers.LogisticRegression( - new MulticlassLogisticRegression.Options { MaximumNumberOfIterations = 10, NumberOfThreads = 1, }); + new LogisticRegressionMulticlassClassificationTrainer.Options { MaximumNumberOfIterations = 10, NumberOfThreads = 1, }); Assert.Throws(() => mlContext.Transforms.Concatenate("Features", Iris.Features) .AppendCacheCheckpoint(mlContext) @@ -515,7 +515,7 @@ public void MetacomponentsFunctionAsExpectedOva() // Create a model training an OVA trainer with a regressor. var regressionTrainer = mlContext.Regression.Trainers.PoissonRegression( - new PoissonRegression.Options { MaximumNumberOfIterations = 10, NumberOfThreads = 1, }); + new PoissonRegressionTrainer.Options { MaximumNumberOfIterations = 10, NumberOfThreads = 1, }); // Todo #2920: Make this fail somehow. var regressionPipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) .AppendCacheCheckpoint(mlContext) From 821e94f20dceaaf16643c0f14d06eddd0389f5e6 Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Wed, 13 Mar 2019 15:22:12 -0700 Subject: [PATCH 8/8] Updating to use new FFM API. --- test/Microsoft.ML.Functional.Tests/Training.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/Microsoft.ML.Functional.Tests/Training.cs b/test/Microsoft.ML.Functional.Tests/Training.cs index 81a6fc3ccc..c1162e7978 100644 --- a/test/Microsoft.ML.Functional.Tests/Training.cs +++ b/test/Microsoft.ML.Functional.Tests/Training.cs @@ -49,8 +49,7 @@ public void CompareTrainerEvaluations() var fastTreeTrainer = mlContext.BinaryClassification.Trainers.FastTree( new FastTreeBinaryClassificationTrainer.Options { NumberOfThreads = 1 }); - var ffmTrainer = mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine( - new FieldAwareFactorizationMachineTrainer.Options { }); + var ffmTrainer = mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine(); // Fit the data transformation pipeline. var featurization = featurizationPipeline.Fit(trainData);