From 8dd53e13d24868e739a201a7f35939a0f82a02c5 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Thu, 28 Feb 2019 17:42:14 -0800 Subject: [PATCH] Api clean up for LightGBM. The cleanup includes: - Changing all abbreviated parameters to full names (i.e. numThreads->NumberOfThreads) - Updating column parameters to have Name if thats what they represent (LabelColumn->LabelColumnName). - Updated baseline files to reflect these changes which are semantical and should not have any computational difference. This fixes #2618 --- .../Dynamic/Trainers/Ranking/LightGbm.cs | 6 +- .../Trainers/Ranking/LightGbmWithOptions.cs | 6 +- .../Dynamic/Trainers/Regression/LightGbm.cs | 4 +- .../Regression/LightGbmWithOptions.cs | 4 +- .../Static/LightGBMRegression.cs | 4 +- .../LightGbmStaticExtensions.cs | 65 ++++---- .../LightGbmArguments.cs | 82 +++++----- .../LightGbmBinaryTrainer.cs | 24 +-- src/Microsoft.ML.LightGBM/LightGbmCatalog.cs | 56 +++---- .../LightGbmMulticlassTrainer.cs | 36 ++--- .../LightGbmRankingTrainer.cs | 41 ++--- .../LightGbmRegressionTrainer.cs | 28 ++-- .../LightGbmTrainerBase.cs | 53 +++---- src/Microsoft.ML.LightGBM/doc.xml | 24 +-- .../Standard/Online/AveragedPerceptron.cs | 12 +- .../Common/EntryPoints/core_manifest.json | 146 +++++++++--------- ...LightGBMDart-CV-breast-cancer.dart-out.txt | 4 +- ...MDart-TrainTest-breast-cancer.dart-out.txt | 2 +- ...LightGBMGoss-CV-breast-cancer.goss-out.txt | 4 +- ...MGoss-TrainTest-breast-cancer.goss-out.txt | 2 +- .../LightGBM-TrainTest-breast-cancer-out.txt | 2 +- .../LightGBMMC/LightGBMMC-CV-iris.key-out.txt | 8 +- .../LightGBMMC-CV-iris.keyU404-out.txt | 8 +- .../LightGBMMC-TrainTest-iris.key-out.txt | 4 +- .../LightGBMMC-TrainTest-iris.keyU404-out.txt | 4 +- ...MReg-CV-generatedRegressionDataset-out.txt | 4 +- ...ainTest-generatedRegressionDataset-out.txt | 2 +- ...-CV-generatedRegressionDataset.MAE-out.txt | 4 +- ...est-generatedRegressionDataset.MAE-out.txt | 2 +- ...CV-generatedRegressionDataset.RMSE-out.txt | 4 +- ...st-generatedRegressionDataset.RMSE-out.txt | 2 +- .../TestPredictors.cs | 4 +- .../Training.cs | 4 +- test/Microsoft.ML.Tests/OnnxConversionTest.cs | 2 +- .../TensorflowTests.cs | 4 +- .../TrainerEstimators/TreeEstimators.cs | 18 +-- 36 files changed, 342 insertions(+), 337 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs index c3bd9d604e..576b98cbee 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs @@ -20,10 +20,10 @@ public static void Example() // Create the Estimator pipeline. For simplicity, we will train a small tree with 4 leaves and 2 boosting iterations. var pipeline = mlContext.Ranking.Trainers.LightGbm( - numLeaves: 4, - minDataPerLeaf: 10, + leafCount: 4, + minimumDataPerLeaf: 10, learningRate: 0.1, - numBoostRound: 2); + numberOfIterations: 2); // Fit this Pipeline to the Training Data. var model = pipeline.Fit(split.TrainSet); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs index dc898fb4d3..235d30e078 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs @@ -23,10 +23,10 @@ public static void Example() var pipeline = mlContext.Ranking.Trainers.LightGbm( new Options { - NumLeaves = 4, - MinDataPerLeaf = 10, + NumberOfLeaves = 4, + MinimumDataPerLeaf = 10, LearningRate = 0.1, - NumBoostRound = 2, + NumberOfIterations = 2, Booster = new TreeBooster.Options { FeatureFraction = 0.9 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs index ce9e27a0fc..5f43a78c25 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs @@ -35,8 +35,8 @@ public static void Example() var pipeline = mlContext.Transforms.Concatenate("Features", featureNames) .Append(mlContext.Regression.Trainers.LightGbm( labelColumnName: labelName, - numLeaves: 4, - minDataPerLeaf: 6, + leafCount: 4, + minimumDataPerLeaf: 6, learningRate: 0.001)); // Fit this pipeline to the training data. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs index e93eeb3f96..c2255554fb 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs @@ -39,8 +39,8 @@ public static void Example() .Append(mlContext.Regression.Trainers.LightGbm(new Options { LabelColumnName = labelName, - NumLeaves = 4, - MinDataPerLeaf = 6, + NumberOfLeaves = 4, + MinimumDataPerLeaf = 6, LearningRate = 0.001, Booster = new GossBooster.Options { diff --git a/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs b/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs index 61225fe1e9..aa2f2e65fc 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs @@ -38,8 +38,8 @@ public static void LightGbmRegression() .Append(r => (r.label, score: mlContext.Regression.Trainers.LightGbm( r.label, r.features, - numLeaves: 4, - minDataPerLeaf: 6, + numberOfLeaves: 4, + minimumDataPerLeaf: 6, learningRate: 0.001, onFit: p => pred = p) ) diff --git a/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs b/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs index 581bbc0b59..9fc5c33c95 100644 --- a/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs +++ b/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs @@ -21,10 +21,10 @@ public static class LightGbmStaticExtensions /// The label column. /// The features column. /// The weights column. - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The number of leaves to use. + /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// Number of iterations. /// A delegate that is called every time the /// method is called on the /// instance created out of this. This delegate will receive @@ -39,19 +39,19 @@ public static class LightGbmStaticExtensions /// public static Scalar LightGbm(this RegressionCatalog.RegressionTrainers catalog, Scalar label, Vector features, Scalar weights = null, - int? numLeaves = null, - int? minDataPerLeaf = null, + int? numberOfLeaves = null, + int? minimumDataPerLeaf = null, double? learningRate = null, - int numBoostRound = Options.Defaults.NumBoostRound, + int numberOfIterations = Options.Defaults.NumberOfIterations, Action onFit = null) { - CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit); + CheckUserValues(label, features, weights, numberOfLeaves, minimumDataPerLeaf, learningRate, numberOfIterations, onFit); var rec = new TrainerEstimatorReconciler.Regression( (env, labelName, featuresName, weightsName) => { - var trainer = new LightGbmRegressorTrainer(env, labelName, featuresName, weightsName, numLeaves, - minDataPerLeaf, learningRate, numBoostRound); + var trainer = new LightGbmRegressorTrainer(env, labelName, featuresName, weightsName, numberOfLeaves, + minimumDataPerLeaf, learningRate, numberOfIterations); if (onFit != null) return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); return trainer; @@ -122,11 +122,13 @@ public static Scalar LightGbm(this RegressionCatalog.RegressionTrainers c /// ]]> /// public static (Scalar score, Scalar probability, Scalar predictedLabel) LightGbm(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, - Scalar label, Vector features, Scalar weights = null, + Scalar label, + Vector features, + Scalar weights = null, int? numLeaves = null, int? minDataPerLeaf = null, double? learningRate = null, - int numBoostRound = Options.Defaults.NumBoostRound, + int numBoostRound = Options.Defaults.NumberOfIterations, Action> onFit = null) { CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit); @@ -194,9 +196,9 @@ public static (Scalar score, Scalar probability, Scalar pred /// The features column. /// The groupId column. /// The weights column. - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The number of leaves to use. + /// Number of iterations. + /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. /// A delegate that is called every time the /// method is called on the @@ -206,21 +208,24 @@ public static (Scalar score, Scalar probability, Scalar pred /// The set of output columns including in order the predicted binary classification score (which will range /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label. public static Scalar LightGbm(this RankingCatalog.RankingTrainers catalog, - Scalar label, Vector features, Key groupId, Scalar weights = null, - int? numLeaves = null, - int? minDataPerLeaf = null, + Scalar label, + Vector features, + Key groupId, + Scalar weights = null, + int? numberOfLeaves = null, + int? minimumDataPerLeaf = null, double? learningRate = null, - int numBoostRound = Options.Defaults.NumBoostRound, + int numberOfIterations = Options.Defaults.NumberOfIterations, Action onFit = null) { - CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit); + CheckUserValues(label, features, weights, numberOfLeaves, minimumDataPerLeaf, learningRate, numberOfIterations, onFit); Contracts.CheckValue(groupId, nameof(groupId)); var rec = new TrainerEstimatorReconciler.Ranker( (env, labelName, featuresName, groupIdName, weightsName) => { - var trainer = new LightGbmRankingTrainer(env, labelName, featuresName, groupIdName, weightsName, numLeaves, - minDataPerLeaf, learningRate, numBoostRound); + var trainer = new LightGbmRankingTrainer(env, labelName, featuresName, groupIdName, weightsName, numberOfLeaves, + minimumDataPerLeaf, learningRate, numberOfIterations); if (onFit != null) return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); @@ -279,10 +284,10 @@ public static Scalar LightGbm(this RankingCatalog.RankingTrainers c /// The label, or dependent variable. /// The features, or independent variables. /// The weights column. - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The number of leaves to use. + /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// Number of iterations. /// A delegate that is called every time the /// method is called on the /// instance created out of this. This delegate will receive @@ -301,19 +306,19 @@ public static (Vector score, Key predictedLabel) Key label, Vector features, Scalar weights = null, - int? numLeaves = null, - int? minDataPerLeaf = null, + int? numberOfLeaves = null, + int? minimumDataPerLeaf = null, double? learningRate = null, - int numBoostRound = Options.Defaults.NumBoostRound, + int numberOfIterations = Options.Defaults.NumberOfIterations, Action onFit = null) { - CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit); + CheckUserValues(label, features, weights, numberOfLeaves, minimumDataPerLeaf, learningRate, numberOfIterations, onFit); var rec = new TrainerEstimatorReconciler.MulticlassClassifier( (env, labelName, featuresName, weightsName) => { - var trainer = new LightGbmMulticlassTrainer(env, labelName, featuresName, weightsName, numLeaves, - minDataPerLeaf, learningRate, numBoostRound); + var trainer = new LightGbmMulticlassTrainer(env, labelName, featuresName, weightsName, numberOfLeaves, + minimumDataPerLeaf, learningRate, numberOfIterations); if (onFit != null) return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); diff --git a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs index 0b635bfaed..047c19b88d 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs @@ -66,14 +66,14 @@ internal virtual void UpdateParameters(Dictionary res) if (attribute == null) continue; - res[GetArgName(field.Name)] = field.GetValue(BoosterParameterOptions); + res[GetOptionName(field.Name)] = field.GetValue(BoosterParameterOptions); } } void IBoosterParameter.UpdateParameters(Dictionary res) => UpdateParameters(res); } - private static string GetArgName(string name) + private static string GetOptionName(string name) { StringBuilder strBuf = new StringBuilder(); bool first = true; @@ -96,7 +96,7 @@ private static string GetArgName(string name) [BestFriend] internal static class Defaults { - public const int NumBoostRound = 100; + public const int NumberOfIterations = 100; } public sealed class TreeBooster : BoosterParameter @@ -107,7 +107,7 @@ public sealed class TreeBooster : BoosterParameter [TlcModule.Component(Name = Name, FriendlyName = FriendlyName, Desc = "Traditional Gradient Boosting Decision Tree.")] public class Options : ISupportBoosterParameterFactory { - [Argument(ArgumentType.AtMostOnce, HelpText = "Use for binary classification when classes are not balanced.", ShortName = "us")] + [Argument(ArgumentType.AtMostOnce, HelpText = "Use for binary classification when training data is not balanced.", ShortName = "us")] public bool UnbalancedSets = false; [Argument(ArgumentType.AtMostOnce, @@ -129,7 +129,7 @@ public class Options : ISupportBoosterParameterFactory public double MinChildWeight = 0.1; [Argument(ArgumentType.AtMostOnce, - HelpText = "Subsample frequency. 0 means no subsample. " + HelpText = "Subsample frequency for bagging. 0 means no subsample. " + "If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.")] [TlcModule.Range(Min = 0, Max = int.MaxValue)] public int SubsampleFreq = 0; @@ -179,7 +179,9 @@ internal TreeBooster(Options options) Contracts.CheckUserArg(BoosterParameterOptions.MinChildWeight >= 0, nameof(BoosterParameterOptions.MinChildWeight), "must be >= 0."); Contracts.CheckUserArg(BoosterParameterOptions.Subsample > 0 && BoosterParameterOptions.Subsample <= 1, nameof(BoosterParameterOptions.Subsample), "must be in (0,1]."); Contracts.CheckUserArg(BoosterParameterOptions.FeatureFraction > 0 && BoosterParameterOptions.FeatureFraction <= 1, nameof(BoosterParameterOptions.FeatureFraction), "must be in (0,1]."); - Contracts.CheckUserArg(BoosterParameterOptions.ScalePosWeight > 0 && BoosterParameterOptions.ScalePosWeight <= 1, nameof(BoosterParameterOptions.ScalePosWeight), "must be in (0,1]."); + Contracts.CheckUserArg(BoosterParameterOptions.RegLambda >= 0, nameof(BoosterParameterOptions.RegLambda), "must be >= 0."); + Contracts.CheckUserArg(BoosterParameterOptions.RegAlpha >= 0, nameof(BoosterParameterOptions.RegAlpha), "must be >= 0."); + Contracts.CheckUserArg(BoosterParameterOptions.ScalePosWeight > 0, nameof(BoosterParameterOptions.ScalePosWeight), "must be >= 0."); } internal override void UpdateParameters(Dictionary res) @@ -197,15 +199,15 @@ public sealed class DartBooster : BoosterParameter [TlcModule.Component(Name = Name, FriendlyName = FriendlyName, Desc = "Dropouts meet Multiple Additive Regresion Trees. See https://arxiv.org/abs/1505.01866")] public sealed class Options : TreeBooster.Options { - [Argument(ArgumentType.AtMostOnce, HelpText = "Drop ratio for trees. Range:(0,1).")] + [Argument(ArgumentType.AtMostOnce, HelpText = "The drop ratio for trees. Range:(0,1).")] [TlcModule.Range(Inf = 0.0, Max = 1.0)] public double DropRate = 0.1; - [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of dropped tree in a boosting round.")] + [Argument(ArgumentType.AtMostOnce, HelpText = "Maximum number of dropped tree in a boosting round.")] [TlcModule.Range(Inf = 0, Max = int.MaxValue)] public int MaxDrop = 1; - [Argument(ArgumentType.AtMostOnce, HelpText = "Probability for not perform dropping in a boosting round.")] + [Argument(ArgumentType.AtMostOnce, HelpText = "Probability for not dropping in a boosting round.")] [TlcModule.Range(Inf = 0.0, Max = 1.0)] public double SkipDrop = 0.5; @@ -222,7 +224,6 @@ internal DartBooster(Options options) : base(options) { Contracts.CheckUserArg(BoosterParameterOptions.DropRate > 0 && BoosterParameterOptions.DropRate < 1, nameof(BoosterParameterOptions.DropRate), "must be in (0,1)."); - Contracts.CheckUserArg(BoosterParameterOptions.MaxDrop > 0, nameof(BoosterParameterOptions.MaxDrop), "must be > 0."); Contracts.CheckUserArg(BoosterParameterOptions.SkipDrop >= 0 && BoosterParameterOptions.SkipDrop < 1, nameof(BoosterParameterOptions.SkipDrop), "must be in [0,1)."); } @@ -241,14 +242,11 @@ public sealed class GossBooster : BoosterParameter [TlcModule.Component(Name = Name, FriendlyName = FriendlyName, Desc = "Gradient-based One-Side Sampling.")] public sealed class Options : TreeBooster.Options { - [Argument(ArgumentType.AtMostOnce, - HelpText = "Retain ratio for large gradient instances.")] + [Argument(ArgumentType.AtMostOnce, HelpText = "Retain ratio for large gradient instances.")] [TlcModule.Range(Inf = 0.0, Max = 1.0)] public double TopRate = 0.2; - [Argument(ArgumentType.AtMostOnce, - HelpText = - "Retain ratio for small gradient instances.")] + [Argument(ArgumentType.AtMostOnce, HelpText = "Retain ratio for small gradient instances.")] [TlcModule.Range(Inf = 0.0, Max = 1.0)] public double OtherRate = 0.1; @@ -287,7 +285,7 @@ public enum EvalMetricType [Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations.", SortOrder = 1, ShortName = "iter")] [TGUI(Label = "Number of boosting iterations", SuggestedSweeps = "10,20,50,100,150,200")] [TlcModule.SweepableDiscreteParam("NumBoostRound", new object[] { 10, 20, 50, 100, 150, 200 })] - public int NumBoostRound = Defaults.NumBoostRound; + public int NumberOfIterations = Defaults.NumberOfIterations; [Argument(ArgumentType.AtMostOnce, HelpText = "Shrinkage rate for trees, used to prevent over-fitting. Range: (0,1].", @@ -300,37 +298,37 @@ public enum EvalMetricType SortOrder = 2, ShortName = "nl", NullName = "")] [TGUI(Description = "The maximum number of leaves per tree", SuggestedSweeps = "2-128;log;inc:4")] [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, isLogScale: true, stepSize: 4)] - public int? NumLeaves; + public int? NumberOfLeaves; [Argument(ArgumentType.AtMostOnce, HelpText = "Minimum number of instances needed in a child.", SortOrder = 2, ShortName = "mil", NullName = "")] [TGUI(Label = "Min Documents In Leaves", SuggestedSweeps = "1,10,20,50 ")] [TlcModule.SweepableDiscreteParamAttribute("MinDataPerLeaf", new object[] { 1, 10, 20, 50 })] - public int? MinDataPerLeaf; + public int? MinimumDataPerLeaf; - [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of bucket bin for features.", ShortName = "mb")] - public int MaxBin = 255; + [Argument(ArgumentType.AtMostOnce, HelpText = "Maximum number of bucket bin for features.", ShortName = "mb")] + public int MaximumBin = 255; [Argument(ArgumentType.Multiple, HelpText = "Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function.", SortOrder = 3)] public ISupportBoosterParameterFactory Booster = new TreeBooster.Options(); [Argument(ArgumentType.AtMostOnce, HelpText = "Verbose", ShortName = "v")] - public bool VerboseEval = false; + public bool Verbose = false; [Argument(ArgumentType.AtMostOnce, HelpText = "Printing running messages.")] public bool Silent = true; [Argument(ArgumentType.AtMostOnce, HelpText = "Number of parallel threads used to run LightGBM.", ShortName = "nt")] - public int? NThread; + public int? NumberOfThreads; [Argument(ArgumentType.AtMostOnce, HelpText = "Evaluation metrics.", ShortName = "em")] - public EvalMetricType EvalMetric = EvalMetricType.DefaultMetric; + public EvalMetricType EvaluationMetric = EvalMetricType.DefaultMetric; [Argument(ArgumentType.AtMostOnce, HelpText = "Use softmax loss for the multi classification.")] [TlcModule.SweepableDiscreteParam("UseSoftmax", new object[] { true, false })] - public bool? UseSoftmax; + public bool? UseSoftMaximum; [Argument(ArgumentType.AtMostOnce, HelpText = "Rounds of early stopping, 0 will disable it.", ShortName = "es")] @@ -350,31 +348,31 @@ public enum EvalMetricType [Argument(ArgumentType.AtMostOnce, HelpText = "Enable categorical split or not.", ShortName = "cat")] [TlcModule.SweepableDiscreteParam("UseCat", new object[] { true, false })] - public bool? UseCat; + public bool? UseCategoricalSplit; - [Argument(ArgumentType.AtMostOnce, HelpText = "Enable missing value auto infer or not.")] + [Argument(ArgumentType.AtMostOnce, HelpText = "Enable special handling of missing value or not.")] [TlcModule.SweepableDiscreteParam("UseMissing", new object[] { true, false })] public bool UseMissing = false; - [Argument(ArgumentType.AtMostOnce, HelpText = "Min number of instances per categorical group.", ShortName = "mdpg")] + [Argument(ArgumentType.AtMostOnce, HelpText = "Minimum number of instances per categorical group.", ShortName = "mdpg")] [TlcModule.Range(Inf = 0, Max = int.MaxValue)] [TlcModule.SweepableDiscreteParam("MinDataPerGroup", new object[] { 10, 50, 100, 200 })] - public int MinDataPerGroup = 100; + public int MinimumDataPerGroup = 100; [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of categorical thresholds.", ShortName = "maxcat")] [TlcModule.Range(Inf = 0, Max = int.MaxValue)] [TlcModule.SweepableDiscreteParam("MaxCatThreshold", new object[] { 8, 16, 32, 64 })] - public int MaxCatThreshold = 32; + public int MaximumCategoricalThreshold = 32; [Argument(ArgumentType.AtMostOnce, HelpText = "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.")] [TlcModule.Range(Min = 0.0)] [TlcModule.SweepableDiscreteParam("CatSmooth", new object[] { 1, 10, 20 })] - public double CatSmooth = 10; + public double CategoricalSmoothing = 10; [Argument(ArgumentType.AtMostOnce, HelpText = "L2 Regularization for categorical split.")] [TlcModule.Range(Min = 0.0)] [TlcModule.SweepableDiscreteParam("CatL2", new object[] { 0.1, 0.5, 1, 5, 10 })] - public double CatL2 = 10; + public double L2Categorical = 10; [Argument(ArgumentType.AtMostOnce, HelpText = "Sets the random seed for LightGBM to use.")] public int? Seed; @@ -385,23 +383,23 @@ public enum EvalMetricType internal Dictionary ToDictionary(IHost host) { Contracts.CheckValue(host, nameof(host)); - Contracts.CheckUserArg(MaxBin > 0, nameof(MaxBin), "must be > 0."); + Contracts.CheckUserArg(MaximumBin > 0, nameof(MaximumBin), "must be > 0."); Contracts.CheckUserArg(Sigmoid > 0, nameof(Sigmoid), "must be > 0."); Dictionary res = new Dictionary(); var boosterParams = Booster.CreateComponent(host); boosterParams.UpdateParameters(res); - res[GetArgName(nameof(MaxBin))] = MaxBin; + res["max_bin"] = MaximumBin; res["verbose"] = Silent ? "-1" : "1"; - if (NThread.HasValue) - res["nthread"] = NThread.Value; + if (NumberOfThreads.HasValue) + res["nthread"] = NumberOfThreads.Value; res["seed"] = (Seed.HasValue) ? Seed : host.Rand.Next(); string metric = null; - switch (EvalMetric) + switch (EvaluationMetric) { case EvalMetricType.DefaultMetric: break; @@ -424,18 +422,18 @@ internal Dictionary ToDictionary(IHost host) case EvalMetricType.Auc: case EvalMetricType.Ndcg: case EvalMetricType.Map: - metric = EvalMetric.ToString().ToLower(); + metric = EvaluationMetric.ToString().ToLower(); break; } if (!string.IsNullOrEmpty(metric)) res["metric"] = metric; res["sigmoid"] = Sigmoid; res["label_gain"] = CustomGains; - res[GetArgName(nameof(UseMissing))] = UseMissing; - res[GetArgName(nameof(MinDataPerGroup))] = MinDataPerGroup; - res[GetArgName(nameof(MaxCatThreshold))] = MaxCatThreshold; - res[GetArgName(nameof(CatSmooth))] = CatSmooth; - res[GetArgName(nameof(CatL2))] = CatL2; + res["use_missing"] = UseMissing; + res["min_data_per_group"] = MinimumDataPerGroup; + res["max_cat_threshold"] = MaximumCategoricalThreshold; + res["cat_smooth"] = CategoricalSmoothing; + res["cat_l2"] = L2Categorical; return res; } } diff --git a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs index 5df67201f8..21693a6882 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs @@ -102,22 +102,22 @@ internal LightGbmBinaryTrainer(IHostEnvironment env, Options options) /// Initializes a new instance of /// /// The private instance of . - /// The name of The label column. - /// The name of the feature column. + /// The name of The label column. + /// The name of the feature column. /// The name for the column containing the initial weight. - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The number of leaves to use. + /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// Number of iterations. internal LightGbmBinaryTrainer(IHostEnvironment env, - string labelColumn = DefaultColumnNames.Label, - string featureColumn = DefaultColumnNames.Features, + string labelColumnName = DefaultColumnNames.Label, + string featureColumnName = DefaultColumnNames.Features, string weights = null, - int? numLeaves = null, - int? minDataPerLeaf = null, + int? leafCount = null, + int? minimumDataPerLeaf = null, double? learningRate = null, - int numBoostRound = LightGBM.Options.Defaults.NumBoostRound) - : base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound) + int numberOfIterations = LightGBM.Options.Defaults.NumberOfIterations) + : base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(labelColumnName), featureColumnName, weights, null, leafCount, minimumDataPerLeaf, learningRate, numberOfIterations) { } @@ -138,7 +138,7 @@ private protected override void CheckDataValid(IChannel ch, RoleMappedData data) if (!(labelType is BooleanDataViewType || labelType is KeyType || labelType == NumberDataViewType.Single)) { throw ch.ExceptParam(nameof(data), - $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType}', but must be key, boolean or R4."); + $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType.RawType}', but must be unsigned int, boolean or float."); } } diff --git a/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs b/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs index 3e38fa248a..03988ad45f 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs @@ -20,10 +20,10 @@ public static class LightGbmExtensions /// The name of the label column. /// The name of the feature column. /// The name of the example weight column (optional). - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The number of leaves to use. + /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// The number of iterations to use. /// /// /// @@ -72,10 +72,10 @@ public static LightGbmRegressorTrainer LightGbm(this RegressionCatalog.Regressio /// The name of the label column. /// The name of the feature column. /// The name of the example weight column (optional). - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The number of leaves to use. + /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// The number of iterations to use. /// /// /// @@ -125,23 +125,23 @@ public static LightGbmBinaryTrainer LightGbm(this BinaryClassificationCatalog.Bi /// The name of the feature column. /// The name of the group column. /// The name of the example weight column (optional). - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The number of leaves to use. + /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// The number of iterations to use. public static LightGbmRankingTrainer LightGbm(this RankingCatalog.RankingTrainers catalog, string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string rowGroupColumnName = DefaultColumnNames.GroupId, string exampleWeightColumnName = null, - int? numLeaves = null, - int? minDataPerLeaf = null, + int? leafCount = null, + int? minimumDataPerLeaf = null, double? learningRate = null, - int numBoostRound = Options.Defaults.NumBoostRound) + int numberOfIterations = Options.Defaults.NumberOfIterations) { Contracts.CheckValue(catalog, nameof(catalog)); var env = CatalogUtils.GetEnvironment(catalog); - return new LightGbmRankingTrainer(env, labelColumnName, featureColumnName, rowGroupColumnName, exampleWeightColumnName, numLeaves, minDataPerLeaf, learningRate, numBoostRound); + return new LightGbmRankingTrainer(env, labelColumnName, featureColumnName, rowGroupColumnName, exampleWeightColumnName, leafCount, minimumDataPerLeaf, learningRate, numberOfIterations); } /// @@ -164,10 +164,10 @@ public static LightGbmRankingTrainer LightGbm(this RankingCatalog.RankingTrainer /// The name of the label column. /// The name of the feature column. /// The name of the example weight column (optional). - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The number of leaves to use. + /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// The number of iterations to use. /// /// /// diff --git a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs index 67e709c5c6..4ddfd845e4 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs @@ -43,22 +43,22 @@ internal LightGbmMulticlassTrainer(IHostEnvironment env, Options options) /// Initializes a new instance of /// /// The private instance of . - /// The name of The label column. - /// The name of the feature column. + /// The name of The label column. + /// The name of the feature column. /// The name for the column containing the initial weight. /// The number of leaves to use. - /// Number of iterations. /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// The number of iterations to use. internal LightGbmMulticlassTrainer(IHostEnvironment env, - string labelColumn = DefaultColumnNames.Label, - string featureColumn = DefaultColumnNames.Features, + string labelColumnName = DefaultColumnNames.Label, + string featureColumnName = DefaultColumnNames.Features, string weights = null, int? numLeaves = null, int? minDataPerLeaf = null, double? learningRate = null, - int numBoostRound = LightGBM.Options.Defaults.NumBoostRound) - : base(env, LoadNameValue, TrainerUtils.MakeU4ScalarColumn(labelColumn), featureColumn, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound) + int numberOfIterations = LightGBM.Options.Defaults.NumberOfIterations) + : base(env, LoadNameValue, TrainerUtils.MakeU4ScalarColumn(labelColumnName), featureColumnName, weights, null, numLeaves, minDataPerLeaf, learningRate, numberOfIterations) { _numClass = -1; } @@ -110,7 +110,7 @@ private protected override void CheckDataValid(IChannel ch, RoleMappedData data) if (!(labelType is BooleanDataViewType || labelType is KeyType || labelType == NumberDataViewType.Single)) { throw ch.ExceptParam(nameof(data), - $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType}', but must be key, boolean or R4."); + $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType.RawType}', but must be of unsigned int, boolean or float."); } } @@ -132,9 +132,9 @@ private protected override void ConvertNaNLabels(IChannel ch, RoleMappedData dat maxLabel = Math.Max(maxLabel, labelColumn); } } - ch.CheckParam(minLabel >= 0, nameof(data), "min labelColumn cannot be negative"); + ch.CheckParam(minLabel >= 0, nameof(data), "Minimum value in label column cannot be negative"); if (maxLabel >= _maxNumClass) - throw ch.ExceptParam(nameof(data), $"max labelColumn cannot exceed {_maxNumClass}"); + throw ch.ExceptParam(nameof(data), $"Maximum value {maxLabel} in label column exceeds {_maxNumClass}"); if (data.Schema.Label.Value.Type is KeyType keyType) { @@ -163,16 +163,16 @@ protected override void GetDefaultParameters(IChannel ch, int numRow, bool hasCa { base.GetDefaultParameters(ch, numRow, hasCategorical, totalCats, true); int numLeaves = (int)Options["num_leaves"]; - int minDataPerLeaf = LightGbmTrainerOptions.MinDataPerLeaf ?? DefaultMinDataPerLeaf(numRow, numLeaves, _numClass); + int minDataPerLeaf = LightGbmTrainerOptions.MinimumDataPerLeaf ?? DefaultMinDataPerLeaf(numRow, numLeaves, _numClass); Options["min_data_per_leaf"] = minDataPerLeaf; if (!hiddenMsg) { if (!LightGbmTrainerOptions.LearningRate.HasValue) ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.LearningRate) + " = " + Options["learning_rate"]); - if (!LightGbmTrainerOptions.NumLeaves.HasValue) - ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.NumLeaves) + " = " + numLeaves); - if (!LightGbmTrainerOptions.MinDataPerLeaf.HasValue) - ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.MinDataPerLeaf) + " = " + minDataPerLeaf); + if (!LightGbmTrainerOptions.NumberOfLeaves.HasValue) + ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.NumberOfLeaves) + " = " + numLeaves); + if (!LightGbmTrainerOptions.MinimumDataPerLeaf.HasValue) + ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.MinimumDataPerLeaf) + " = " + minDataPerLeaf); } } @@ -184,14 +184,14 @@ private protected override void CheckAndUpdateParametersBeforeTraining(IChannel Options["num_class"] = _numClass; bool useSoftmax = false; - if (LightGbmTrainerOptions.UseSoftmax.HasValue) - useSoftmax = LightGbmTrainerOptions.UseSoftmax.Value; + if (LightGbmTrainerOptions.UseSoftMaximum.HasValue) + useSoftmax = LightGbmTrainerOptions.UseSoftMaximum.Value; else { if (labels.Length >= _minDataToUseSoftmax) useSoftmax = true; - ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.UseSoftmax) + " = " + useSoftmax); + ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.UseSoftMaximum) + " = " + useSoftmax); } if (useSoftmax) diff --git a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs index 482a04f0d9..d9870205a3 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs @@ -22,7 +22,6 @@ namespace Microsoft.ML.LightGBM { - public sealed class LightGbmRankingModelParameters : TreeEnsembleModelParametersBasedOnRegressionTree { internal const string LoaderSignature = "LightGBMRankerExec"; @@ -89,26 +88,28 @@ internal LightGbmRankingTrainer(IHostEnvironment env, Options options) /// Initializes a new instance of /// /// The private instance of . - /// The name of the label column. - /// The name of the feature column. - /// The name of the column containing the group ID. - /// The name of the optional column containing the initial weights. - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The name of the label column. + /// The name of the feature column. + /// The name of the column containing the group ID. + /// The name of the optional column containing the initial weights. + /// The number of leaves to use. /// The learning rate. + /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The number of iterations to use. internal LightGbmRankingTrainer(IHostEnvironment env, - string labelColumn = DefaultColumnNames.Label, - string featureColumn = DefaultColumnNames.Features, - string groupId = DefaultColumnNames.GroupId, - string weights = null, - int? numLeaves = null, - int? minDataPerLeaf = null, + string labelColumnName = DefaultColumnNames.Label, + string featureColumnName = DefaultColumnNames.Features, + string groupIdColumnName = DefaultColumnNames.GroupId, + string weightsColumnName = null, + int? leafCount = null, + int? minimumDataPerLeaf = null, double? learningRate = null, - int numBoostRound = LightGBM.Options.Defaults.NumBoostRound) - : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumn), featureColumn, weights, groupId, numLeaves, minDataPerLeaf, learningRate, numBoostRound) + int numberOfIterations = LightGBM.Options.Defaults.NumberOfIterations) + : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumnName), + featureColumnName, weightsColumnName, groupIdColumnName, leafCount, + minimumDataPerLeaf, learningRate, numberOfIterations) { - Host.CheckNonEmpty(groupId, nameof(groupId)); + Host.CheckNonEmpty(groupIdColumnName, nameof(groupIdColumnName)); } private protected override void CheckDataValid(IChannel ch, RoleMappedData data) @@ -121,7 +122,7 @@ private protected override void CheckDataValid(IChannel ch, RoleMappedData data) if (!(labelType is KeyType || labelType == NumberDataViewType.Single)) { throw ch.ExceptParam(nameof(data), - $"Label column '{labelCol.Name}' is of type '{labelType}', but must be key or R4."); + $"Label column '{labelCol.Name}' is of type '{labelType.RawType}', but must be unsigned int or float."); } // Check group types. ch.CheckParam(data.Schema.Group.HasValue, nameof(data), "Need a group column."); @@ -130,7 +131,7 @@ private protected override void CheckDataValid(IChannel ch, RoleMappedData data) if (!(groupType == NumberDataViewType.UInt32 || groupType is KeyType)) { throw ch.ExceptParam(nameof(data), - $"Group column '{groupCol.Name}' is of type '{groupType}', but must be U4 or a Key."); + $"Group column '{groupCol.Name}' is of type '{groupType.RawType}', but must be unsigned int."); } } @@ -139,7 +140,7 @@ private protected override void CheckLabelCompatible(SchemaShape.Column labelCol Contracts.Assert(labelCol.IsValid); Action error = - () => throw Host.ExceptSchemaMismatch(nameof(labelCol), "label", labelCol.Name, "float or KeyType", labelCol.GetTypeString()); + () => throw Host.ExceptSchemaMismatch(nameof(labelCol), "label", labelCol.Name, "float or unsigned int", labelCol.GetTypeString()); if (labelCol.Kind != SchemaShape.Column.VectorKind.Scalar) error(); diff --git a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs index 3729154cbd..f93e2126c6 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs @@ -87,22 +87,22 @@ public sealed class LightGbmRegressorTrainer : LightGbmTrainerBase /// /// The private instance of . - /// The name of the label column. - /// The name of the feature column. - /// The name for the column containing the initial weight. - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The name of the label column. + /// The name of the feature column. + /// The name for the column containing the initial weight. + /// The number of leaves to use. + /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// Number of iterations. internal LightGbmRegressorTrainer(IHostEnvironment env, - string labelColumn = DefaultColumnNames.Label, - string featureColumn = DefaultColumnNames.Features, - string weights = null, - int? numLeaves = null, - int? minDataPerLeaf = null, + string labelColumnName = DefaultColumnNames.Label, + string featureColumnName = DefaultColumnNames.Features, + string weightsColumnName = null, + int? numberOfLeaves = null, + int? minimumDataPerLeaf = null, double? learningRate = null, - int numBoostRound = LightGBM.Options.Defaults.NumBoostRound) - : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumn), featureColumn, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound) + int numberOfIterations = LightGBM.Options.Defaults.NumberOfIterations) + : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumnName), featureColumnName, weightsColumnName, null, numberOfLeaves, minimumDataPerLeaf, learningRate, numberOfIterations) { } @@ -127,7 +127,7 @@ private protected override void CheckDataValid(IChannel ch, RoleMappedData data) if (!(labelType is BooleanDataViewType || labelType is KeyType || labelType == NumberDataViewType.Single)) { throw ch.ExceptParam(nameof(data), - $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType}', but must be key, boolean or R4."); + $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType.RawType}', but must be an unsigned int, boolean or float."); } } diff --git a/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs b/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs index 614251c117..7ae79c09fc 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs @@ -58,27 +58,28 @@ private sealed class CategoricalMetaData private protected LightGbmTrainerBase(IHostEnvironment env, string name, - SchemaShape.Column label, - string featureColumn, - string weightColumn, - string groupIdColumn, - int? numLeaves, - int? minDataPerLeaf, + SchemaShape.Column labelColumn, + string featureColumnName, + string weightColumnName, + string groupIdColumnName, + int? leafCount, + int? minimumDataPerLeaf, double? learningRate, - int numBoostRound) - : base(Contracts.CheckRef(env, nameof(env)).Register(name), TrainerUtils.MakeR4VecFeature(featureColumn), label, TrainerUtils.MakeR4ScalarWeightColumn(weightColumn), TrainerUtils.MakeU4ScalarColumn(groupIdColumn)) + int numberOfIterations) + : base(Contracts.CheckRef(env, nameof(env)).Register(name), TrainerUtils.MakeR4VecFeature(featureColumnName), + labelColumn, TrainerUtils.MakeR4ScalarWeightColumn(weightColumnName), TrainerUtils.MakeU4ScalarColumn(groupIdColumnName)) { LightGbmTrainerOptions = new Options(); - LightGbmTrainerOptions.NumLeaves = numLeaves; - LightGbmTrainerOptions.MinDataPerLeaf = minDataPerLeaf; + LightGbmTrainerOptions.NumberOfLeaves = leafCount; + LightGbmTrainerOptions.MinimumDataPerLeaf = minimumDataPerLeaf; LightGbmTrainerOptions.LearningRate = learningRate; - LightGbmTrainerOptions.NumBoostRound = numBoostRound; + LightGbmTrainerOptions.NumberOfIterations = numberOfIterations; - LightGbmTrainerOptions.LabelColumnName = label.Name; - LightGbmTrainerOptions.FeatureColumnName = featureColumn; - LightGbmTrainerOptions.ExampleWeightColumnName = weightColumn; - LightGbmTrainerOptions.RowGroupColumnName = groupIdColumn; + LightGbmTrainerOptions.LabelColumnName = labelColumn.Name; + LightGbmTrainerOptions.FeatureColumnName = featureColumnName; + LightGbmTrainerOptions.ExampleWeightColumnName = weightColumnName; + LightGbmTrainerOptions.RowGroupColumnName = groupIdColumnName; InitParallelTraining(); } @@ -167,8 +168,8 @@ private protected virtual void CheckDataValid(IChannel ch, RoleMappedData data) protected virtual void GetDefaultParameters(IChannel ch, int numRow, bool hasCategarical, int totalCats, bool hiddenMsg = false) { double learningRate = LightGbmTrainerOptions.LearningRate ?? DefaultLearningRate(numRow, hasCategarical, totalCats); - int numLeaves = LightGbmTrainerOptions.NumLeaves ?? DefaultNumLeaves(numRow, hasCategarical, totalCats); - int minDataPerLeaf = LightGbmTrainerOptions.MinDataPerLeaf ?? DefaultMinDataPerLeaf(numRow, numLeaves, 1); + int numLeaves = LightGbmTrainerOptions.NumberOfLeaves ?? DefaultNumLeaves(numRow, hasCategarical, totalCats); + int minDataPerLeaf = LightGbmTrainerOptions.MinimumDataPerLeaf ?? DefaultMinDataPerLeaf(numRow, numLeaves, 1); Options["learning_rate"] = learningRate; Options["num_leaves"] = numLeaves; Options["min_data_per_leaf"] = minDataPerLeaf; @@ -176,10 +177,10 @@ protected virtual void GetDefaultParameters(IChannel ch, int numRow, bool hasCat { if (!LightGbmTrainerOptions.LearningRate.HasValue) ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.LearningRate) + " = " + learningRate); - if (!LightGbmTrainerOptions.NumLeaves.HasValue) - ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.NumLeaves) + " = " + numLeaves); - if (!LightGbmTrainerOptions.MinDataPerLeaf.HasValue) - ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.MinDataPerLeaf) + " = " + minDataPerLeaf); + if (!LightGbmTrainerOptions.NumberOfLeaves.HasValue) + ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.NumberOfLeaves) + " = " + numLeaves); + if (!LightGbmTrainerOptions.MinimumDataPerLeaf.HasValue) + ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.MinimumDataPerLeaf) + " = " + minDataPerLeaf); } } @@ -274,9 +275,9 @@ private CategoricalMetaData GetCategoricalMetaData(IChannel ch, RoleMappedData t int[] categoricalFeatures = null; const int useCatThreshold = 50000; // Disable cat when data is too small, reduce the overfitting. - bool useCat = LightGbmTrainerOptions.UseCat ?? numRow > useCatThreshold; - if (!LightGbmTrainerOptions.UseCat.HasValue) - ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.UseCat) + " = " + useCat); + bool useCat = LightGbmTrainerOptions.UseCategoricalSplit ?? numRow > useCatThreshold; + if (!LightGbmTrainerOptions.UseCategoricalSplit.HasValue) + ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.UseCategoricalSplit) + " = " + useCat); if (useCat) { var featureCol = trainData.Schema.Schema[DefaultColumnNames.Features]; @@ -369,8 +370,8 @@ private void TrainCore(IChannel ch, IProgressChannel pch, Dataset dtrain, Catego { ch.Info("LightGBM objective={0}", Options["objective"]); using (Booster bst = WrappedLightGbmTraining.Train(ch, pch, Options, dtrain, - dvalid: dvalid, numIteration: LightGbmTrainerOptions.NumBoostRound, - verboseEval: LightGbmTrainerOptions.VerboseEval, earlyStoppingRound: LightGbmTrainerOptions.EarlyStoppingRound)) + dvalid: dvalid, numIteration: LightGbmTrainerOptions.NumberOfIterations, + verboseEval: LightGbmTrainerOptions.Verbose, earlyStoppingRound: LightGbmTrainerOptions.EarlyStoppingRound)) { TrainedEnsemble = bst.GetModel(catMetaData.CategoricalBoudaries); } diff --git a/src/Microsoft.ML.LightGBM/doc.xml b/src/Microsoft.ML.LightGBM/doc.xml index 1fcd38dd7a..dfa4ccfad7 100644 --- a/src/Microsoft.ML.LightGBM/doc.xml +++ b/src/Microsoft.ML.LightGBM/doc.xml @@ -16,10 +16,10 @@ new LightGbmBinaryClassifier { - NumBoostRound = 200, + NumberOfIterations = 200, LearningRate = 0.5f, - NumLeaves = 32, - MinDataPerLeaf = 20 + NumberOfLeaves = 32, + MinimumDataPerLeaf = 20 } @@ -29,10 +29,10 @@ new LightGbmClassifier { - NumBoostRound = 200, + NumberOfIterations = 200, LearningRate = 0.5f, - NumLeaves = 32, - MinDataPerLeaf = 20 + NumberOfLeaves = 32, + MinimumDataPerLeaf = 20 } @@ -42,10 +42,10 @@ new LightGbmRegressor { - NumBoostRound = 100, + NumberOfIterations = 100, LearningRate = 0.5f, - NumLeaves = 32, - MinDataPerLeaf = 20, + NumberOfLeaves = 32, + MinimumDataPerLeaf = 20, Booster = new DartBoosterParameterFunction { XgboostDartMode = true, @@ -60,10 +60,10 @@ new LightGbmRanker { - NumBoostRound = 100, + NumberOfIterations = 100, LearningRate = 0.5f, - NumLeaves = 32, - MinDataPerLeaf = 20, + NumberOfLeaves = 32, + MinimumDataPerLeaf = 20, Booster = new GbdtBoosterParameterFunction { MinSplitGain = 3, diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs index c91fae0244..3fe119d98a 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs @@ -121,15 +121,15 @@ internal AveragedPerceptronTrainer(IHostEnvironment env, Options options) /// /// The local instance of the /// The classification loss function. - /// The name of the label column. - /// The name of the feature column. + /// The name of the label column. + /// The name of the feature column. /// The learning rate. /// Whether to decrease learning rate as iterations progress. /// L2 Regularization Weight. /// The number of training iterations. internal AveragedPerceptronTrainer(IHostEnvironment env, - string labelColumn = DefaultColumnNames.Label, - string featureColumn = DefaultColumnNames.Features, + string labelColumnName = DefaultColumnNames.Label, + string featureColumnName = DefaultColumnNames.Features, IClassificationLoss lossFunction = null, float learningRate = Options.AveragedDefault.LearningRate, bool decreaseLearningRate = Options.AveragedDefault.DecreaseLearningRate, @@ -137,8 +137,8 @@ internal AveragedPerceptronTrainer(IHostEnvironment env, int numIterations = Options.AveragedDefault.NumIterations) : this(env, new Options { - LabelColumnName = labelColumn, - FeatureColumnName = featureColumn, + LabelColumnName = labelColumnName, + FeatureColumnName = featureColumnName, LearningRate = learningRate, DecreaseLearningRate = decreaseLearningRate, L2RegularizerWeight = l2RegularizerWeight, diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index d3e0180dbd..8654c4c032 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -11129,7 +11129,7 @@ "ShortName": "LightGBM", "Inputs": [ { - "Name": "NumBoostRound", + "Name": "NumberOfIterations", "Type": "Int", "Desc": "Number of iterations.", "Aliases": [ @@ -11181,7 +11181,7 @@ } }, { - "Name": "NumLeaves", + "Name": "NumberOfLeaves", "Type": "Int", "Desc": "Maximum leaves for trees.", "Aliases": [ @@ -11200,7 +11200,7 @@ } }, { - "Name": "MinDataPerLeaf", + "Name": "MinimumDataPerLeaf", "Type": "Int", "Desc": "Minimum number of instances needed in a child.", "Aliases": [ @@ -11322,9 +11322,9 @@ "Default": "Auto" }, { - "Name": "MaxBin", + "Name": "MaximumBin", "Type": "Int", - "Desc": "Max number of bucket bin for features.", + "Desc": "Maximum number of bucket bin for features.", "Aliases": [ "mb" ], @@ -11334,7 +11334,7 @@ "Default": 255 }, { - "Name": "VerboseEval", + "Name": "Verbose", "Type": "Bool", "Desc": "Verbose", "Aliases": [ @@ -11355,7 +11355,7 @@ "Default": true }, { - "Name": "NThread", + "Name": "NumberOfThreads", "Type": "Int", "Desc": "Number of parallel threads used to run LightGBM.", "Aliases": [ @@ -11367,7 +11367,7 @@ "Default": null }, { - "Name": "EvalMetric", + "Name": "EvaluationMetric", "Type": { "Kind": "Enum", "Values": [ @@ -11393,7 +11393,7 @@ "Default": "DefaultMetric" }, { - "Name": "UseSoftmax", + "Name": "UseSoftMaximum", "Type": "Bool", "Desc": "Use softmax loss for the multi classification.", "Required": false, @@ -11454,7 +11454,7 @@ "Default": 1048576 }, { - "Name": "UseCat", + "Name": "UseCategoricalSplit", "Type": "Bool", "Desc": "Enable categorical split or not.", "Aliases": [ @@ -11475,7 +11475,7 @@ { "Name": "UseMissing", "Type": "Bool", - "Desc": "Enable missing value auto infer or not.", + "Desc": "Enable special handling of missing value or not.", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -11489,9 +11489,9 @@ } }, { - "Name": "MinDataPerGroup", + "Name": "MinimumDataPerGroup", "Type": "Int", - "Desc": "Min number of instances per categorical group.", + "Desc": "Minimum number of instances per categorical group.", "Aliases": [ "mdpg" ], @@ -11514,7 +11514,7 @@ } }, { - "Name": "MaxCatThreshold", + "Name": "MaximumCategoricalThreshold", "Type": "Int", "Desc": "Max number of categorical thresholds.", "Aliases": [ @@ -11539,7 +11539,7 @@ } }, { - "Name": "CatSmooth", + "Name": "CategoricalSmoothing", "Type": "Float", "Desc": "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.", "Required": false, @@ -11559,7 +11559,7 @@ } }, { - "Name": "CatL2", + "Name": "L2Categorical", "Type": "Float", "Desc": "L2 Regularization for categorical split.", "Required": false, @@ -11632,7 +11632,7 @@ "ShortName": "LightGBMMC", "Inputs": [ { - "Name": "NumBoostRound", + "Name": "NumberOfIterations", "Type": "Int", "Desc": "Number of iterations.", "Aliases": [ @@ -11684,7 +11684,7 @@ } }, { - "Name": "NumLeaves", + "Name": "NumberOfLeaves", "Type": "Int", "Desc": "Maximum leaves for trees.", "Aliases": [ @@ -11703,7 +11703,7 @@ } }, { - "Name": "MinDataPerLeaf", + "Name": "MinimumDataPerLeaf", "Type": "Int", "Desc": "Minimum number of instances needed in a child.", "Aliases": [ @@ -11825,9 +11825,9 @@ "Default": "Auto" }, { - "Name": "MaxBin", + "Name": "MaximumBin", "Type": "Int", - "Desc": "Max number of bucket bin for features.", + "Desc": "Maximum number of bucket bin for features.", "Aliases": [ "mb" ], @@ -11837,7 +11837,7 @@ "Default": 255 }, { - "Name": "VerboseEval", + "Name": "Verbose", "Type": "Bool", "Desc": "Verbose", "Aliases": [ @@ -11858,7 +11858,7 @@ "Default": true }, { - "Name": "NThread", + "Name": "NumberOfThreads", "Type": "Int", "Desc": "Number of parallel threads used to run LightGBM.", "Aliases": [ @@ -11870,7 +11870,7 @@ "Default": null }, { - "Name": "EvalMetric", + "Name": "EvaluationMetric", "Type": { "Kind": "Enum", "Values": [ @@ -11896,7 +11896,7 @@ "Default": "DefaultMetric" }, { - "Name": "UseSoftmax", + "Name": "UseSoftMaximum", "Type": "Bool", "Desc": "Use softmax loss for the multi classification.", "Required": false, @@ -11957,7 +11957,7 @@ "Default": 1048576 }, { - "Name": "UseCat", + "Name": "UseCategoricalSplit", "Type": "Bool", "Desc": "Enable categorical split or not.", "Aliases": [ @@ -11978,7 +11978,7 @@ { "Name": "UseMissing", "Type": "Bool", - "Desc": "Enable missing value auto infer or not.", + "Desc": "Enable special handling of missing value or not.", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -11992,9 +11992,9 @@ } }, { - "Name": "MinDataPerGroup", + "Name": "MinimumDataPerGroup", "Type": "Int", - "Desc": "Min number of instances per categorical group.", + "Desc": "Minimum number of instances per categorical group.", "Aliases": [ "mdpg" ], @@ -12017,7 +12017,7 @@ } }, { - "Name": "MaxCatThreshold", + "Name": "MaximumCategoricalThreshold", "Type": "Int", "Desc": "Max number of categorical thresholds.", "Aliases": [ @@ -12042,7 +12042,7 @@ } }, { - "Name": "CatSmooth", + "Name": "CategoricalSmoothing", "Type": "Float", "Desc": "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.", "Required": false, @@ -12062,7 +12062,7 @@ } }, { - "Name": "CatL2", + "Name": "L2Categorical", "Type": "Float", "Desc": "L2 Regularization for categorical split.", "Required": false, @@ -12135,7 +12135,7 @@ "ShortName": "LightGBMRank", "Inputs": [ { - "Name": "NumBoostRound", + "Name": "NumberOfIterations", "Type": "Int", "Desc": "Number of iterations.", "Aliases": [ @@ -12187,7 +12187,7 @@ } }, { - "Name": "NumLeaves", + "Name": "NumberOfLeaves", "Type": "Int", "Desc": "Maximum leaves for trees.", "Aliases": [ @@ -12206,7 +12206,7 @@ } }, { - "Name": "MinDataPerLeaf", + "Name": "MinimumDataPerLeaf", "Type": "Int", "Desc": "Minimum number of instances needed in a child.", "Aliases": [ @@ -12328,9 +12328,9 @@ "Default": "Auto" }, { - "Name": "MaxBin", + "Name": "MaximumBin", "Type": "Int", - "Desc": "Max number of bucket bin for features.", + "Desc": "Maximum number of bucket bin for features.", "Aliases": [ "mb" ], @@ -12340,7 +12340,7 @@ "Default": 255 }, { - "Name": "VerboseEval", + "Name": "Verbose", "Type": "Bool", "Desc": "Verbose", "Aliases": [ @@ -12361,7 +12361,7 @@ "Default": true }, { - "Name": "NThread", + "Name": "NumberOfThreads", "Type": "Int", "Desc": "Number of parallel threads used to run LightGBM.", "Aliases": [ @@ -12373,7 +12373,7 @@ "Default": null }, { - "Name": "EvalMetric", + "Name": "EvaluationMetric", "Type": { "Kind": "Enum", "Values": [ @@ -12399,7 +12399,7 @@ "Default": "DefaultMetric" }, { - "Name": "UseSoftmax", + "Name": "UseSoftMaximum", "Type": "Bool", "Desc": "Use softmax loss for the multi classification.", "Required": false, @@ -12460,7 +12460,7 @@ "Default": 1048576 }, { - "Name": "UseCat", + "Name": "UseCategoricalSplit", "Type": "Bool", "Desc": "Enable categorical split or not.", "Aliases": [ @@ -12481,7 +12481,7 @@ { "Name": "UseMissing", "Type": "Bool", - "Desc": "Enable missing value auto infer or not.", + "Desc": "Enable special handling of missing value or not.", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -12495,9 +12495,9 @@ } }, { - "Name": "MinDataPerGroup", + "Name": "MinimumDataPerGroup", "Type": "Int", - "Desc": "Min number of instances per categorical group.", + "Desc": "Minimum number of instances per categorical group.", "Aliases": [ "mdpg" ], @@ -12520,7 +12520,7 @@ } }, { - "Name": "MaxCatThreshold", + "Name": "MaximumCategoricalThreshold", "Type": "Int", "Desc": "Max number of categorical thresholds.", "Aliases": [ @@ -12545,7 +12545,7 @@ } }, { - "Name": "CatSmooth", + "Name": "CategoricalSmoothing", "Type": "Float", "Desc": "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.", "Required": false, @@ -12565,7 +12565,7 @@ } }, { - "Name": "CatL2", + "Name": "L2Categorical", "Type": "Float", "Desc": "L2 Regularization for categorical split.", "Required": false, @@ -12638,7 +12638,7 @@ "ShortName": "LightGBMR", "Inputs": [ { - "Name": "NumBoostRound", + "Name": "NumberOfIterations", "Type": "Int", "Desc": "Number of iterations.", "Aliases": [ @@ -12690,7 +12690,7 @@ } }, { - "Name": "NumLeaves", + "Name": "NumberOfLeaves", "Type": "Int", "Desc": "Maximum leaves for trees.", "Aliases": [ @@ -12709,7 +12709,7 @@ } }, { - "Name": "MinDataPerLeaf", + "Name": "MinimumDataPerLeaf", "Type": "Int", "Desc": "Minimum number of instances needed in a child.", "Aliases": [ @@ -12831,9 +12831,9 @@ "Default": "Auto" }, { - "Name": "MaxBin", + "Name": "MaximumBin", "Type": "Int", - "Desc": "Max number of bucket bin for features.", + "Desc": "Maximum number of bucket bin for features.", "Aliases": [ "mb" ], @@ -12843,7 +12843,7 @@ "Default": 255 }, { - "Name": "VerboseEval", + "Name": "Verbose", "Type": "Bool", "Desc": "Verbose", "Aliases": [ @@ -12864,7 +12864,7 @@ "Default": true }, { - "Name": "NThread", + "Name": "NumberOfThreads", "Type": "Int", "Desc": "Number of parallel threads used to run LightGBM.", "Aliases": [ @@ -12876,7 +12876,7 @@ "Default": null }, { - "Name": "EvalMetric", + "Name": "EvaluationMetric", "Type": { "Kind": "Enum", "Values": [ @@ -12902,7 +12902,7 @@ "Default": "DefaultMetric" }, { - "Name": "UseSoftmax", + "Name": "UseSoftMaximum", "Type": "Bool", "Desc": "Use softmax loss for the multi classification.", "Required": false, @@ -12963,7 +12963,7 @@ "Default": 1048576 }, { - "Name": "UseCat", + "Name": "UseCategoricalSplit", "Type": "Bool", "Desc": "Enable categorical split or not.", "Aliases": [ @@ -12984,7 +12984,7 @@ { "Name": "UseMissing", "Type": "Bool", - "Desc": "Enable missing value auto infer or not.", + "Desc": "Enable special handling of missing value or not.", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -12998,9 +12998,9 @@ } }, { - "Name": "MinDataPerGroup", + "Name": "MinimumDataPerGroup", "Type": "Int", - "Desc": "Min number of instances per categorical group.", + "Desc": "Minimum number of instances per categorical group.", "Aliases": [ "mdpg" ], @@ -13023,7 +13023,7 @@ } }, { - "Name": "MaxCatThreshold", + "Name": "MaximumCategoricalThreshold", "Type": "Int", "Desc": "Max number of categorical thresholds.", "Aliases": [ @@ -13048,7 +13048,7 @@ } }, { - "Name": "CatSmooth", + "Name": "CategoricalSmoothing", "Type": "Float", "Desc": "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.", "Required": false, @@ -13068,7 +13068,7 @@ } }, { - "Name": "CatL2", + "Name": "L2Categorical", "Type": "Float", "Desc": "L2 Regularization for categorical split.", "Required": false, @@ -23538,7 +23538,7 @@ { "Name": "DropRate", "Type": "Float", - "Desc": "Drop ratio for trees. Range:(0,1).", + "Desc": "The drop ratio for trees. Range:(0,1).", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -23551,7 +23551,7 @@ { "Name": "MaxDrop", "Type": "Int", - "Desc": "Max number of dropped tree in a boosting round.", + "Desc": "Maximum number of dropped tree in a boosting round.", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -23564,7 +23564,7 @@ { "Name": "SkipDrop", "Type": "Float", - "Desc": "Probability for not perform dropping in a boosting round.", + "Desc": "Probability for not dropping in a boosting round.", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -23595,7 +23595,7 @@ { "Name": "UnbalancedSets", "Type": "Bool", - "Desc": "Use for binary classification when classes are not balanced.", + "Desc": "Use for binary classification when training data is not balanced.", "Aliases": [ "us" ], @@ -23644,7 +23644,7 @@ { "Name": "SubsampleFreq", "Type": "Int", - "Desc": "Subsample frequency. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.", + "Desc": "Subsample frequency for bagging. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -23748,7 +23748,7 @@ { "Name": "UnbalancedSets", "Type": "Bool", - "Desc": "Use for binary classification when classes are not balanced.", + "Desc": "Use for binary classification when training data is not balanced.", "Aliases": [ "us" ], @@ -23797,7 +23797,7 @@ { "Name": "SubsampleFreq", "Type": "Int", - "Desc": "Subsample frequency. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.", + "Desc": "Subsample frequency for bagging. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -23927,7 +23927,7 @@ { "Name": "UnbalancedSets", "Type": "Bool", - "Desc": "Use for binary classification when classes are not balanced.", + "Desc": "Use for binary classification when training data is not balanced.", "Aliases": [ "us" ], @@ -23976,7 +23976,7 @@ { "Name": "SubsampleFreq", "Type": "Int", - "Desc": "Subsample frequency. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.", + "Desc": "Subsample frequency for bagging. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.", "Required": false, "SortOrder": 150.0, "IsNullable": false, diff --git a/test/BaselineOutput/Common/LightGBM/LightGBMDart-CV-breast-cancer.dart-out.txt b/test/BaselineOutput/Common/LightGBM/LightGBMDart-CV-breast-cancer.dart-out.txt index 44635aa70a..e79b7a61e7 100644 --- a/test/BaselineOutput/Common/LightGBM/LightGBMDart-CV-breast-cancer.dart-out.txt +++ b/test/BaselineOutput/Common/LightGBM/LightGBMDart-CV-breast-cancer.dart-out.txt @@ -1,10 +1,10 @@ maml.exe CV tr=LightGBM{nt=1 iter=10 booster=dart lr=0.2 mil=10 nl=20} threads=- cache=- dout=%Output% loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9} data=%Data% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=binary Not training a calibrator because it is not needed. Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=binary Not training a calibrator because it is not needed. TEST POSITIVE RATIO: 0.3702 (134.0/(134.0+228.0)) diff --git a/test/BaselineOutput/Common/LightGBM/LightGBMDart-TrainTest-breast-cancer.dart-out.txt b/test/BaselineOutput/Common/LightGBM/LightGBMDart-TrainTest-breast-cancer.dart-out.txt index 232f6326d4..bfa0bf3f97 100644 --- a/test/BaselineOutput/Common/LightGBM/LightGBMDart-TrainTest-breast-cancer.dart-out.txt +++ b/test/BaselineOutput/Common/LightGBM/LightGBMDart-TrainTest-breast-cancer.dart-out.txt @@ -1,6 +1,6 @@ maml.exe TrainTest test=%Data% tr=LightGBM{nt=1 iter=10 booster=dart lr=0.2 mil=10 nl=20} cache=- dout=%Output% loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9} data=%Data% out=%Output% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=binary Not training a calibrator because it is not needed. TEST POSITIVE RATIO: 0.3448 (241.0/(241.0+458.0)) diff --git a/test/BaselineOutput/Common/LightGBM/LightGBMGoss-CV-breast-cancer.goss-out.txt b/test/BaselineOutput/Common/LightGBM/LightGBMGoss-CV-breast-cancer.goss-out.txt index 8bd89002c1..a331a81b7e 100644 --- a/test/BaselineOutput/Common/LightGBM/LightGBMGoss-CV-breast-cancer.goss-out.txt +++ b/test/BaselineOutput/Common/LightGBM/LightGBMGoss-CV-breast-cancer.goss-out.txt @@ -1,10 +1,10 @@ maml.exe CV tr=LightGBM{nt=1 iter=10 v=+ booster=goss lr=0.2 mil=10 nl=20} threads=- cache=- dout=%Output% loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9} data=%Data% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=binary Not training a calibrator because it is not needed. Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=binary Not training a calibrator because it is not needed. TEST POSITIVE RATIO: 0.3702 (134.0/(134.0+228.0)) diff --git a/test/BaselineOutput/Common/LightGBM/LightGBMGoss-TrainTest-breast-cancer.goss-out.txt b/test/BaselineOutput/Common/LightGBM/LightGBMGoss-TrainTest-breast-cancer.goss-out.txt index ba50420a1d..d249f34e1b 100644 --- a/test/BaselineOutput/Common/LightGBM/LightGBMGoss-TrainTest-breast-cancer.goss-out.txt +++ b/test/BaselineOutput/Common/LightGBM/LightGBMGoss-TrainTest-breast-cancer.goss-out.txt @@ -1,6 +1,6 @@ maml.exe TrainTest test=%Data% tr=LightGBM{nt=1 iter=10 v=+ booster=goss lr=0.2 mil=10 nl=20} cache=- dout=%Output% loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9} data=%Data% out=%Output% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=binary Not training a calibrator because it is not needed. TEST POSITIVE RATIO: 0.3448 (241.0/(241.0+458.0)) diff --git a/test/BaselineOutput/Common/LightGBMBinary/LightGBM-TrainTest-breast-cancer-out.txt b/test/BaselineOutput/Common/LightGBMBinary/LightGBM-TrainTest-breast-cancer-out.txt index 2496917e04..391a8665ce 100644 --- a/test/BaselineOutput/Common/LightGBMBinary/LightGBM-TrainTest-breast-cancer-out.txt +++ b/test/BaselineOutput/Common/LightGBMBinary/LightGBM-TrainTest-breast-cancer-out.txt @@ -1,6 +1,6 @@ maml.exe TrainTest test=%Data% tr=LightGBMBinary{nt=1 nl=5 mil=5 lr=0.25 iter=20 mb=255} cache=- dout=%Output% loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9} data=%Data% out=%Output% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=binary Not training a calibrator because it is not needed. TEST POSITIVE RATIO: 0.3448 (241.0/(241.0+458.0)) diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt index dcd0b08107..a9a5af5543 100644 --- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt +++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt @@ -1,12 +1,12 @@ maml.exe CV tr=LightGBMMC{nt=1 iter=10 v=- lr=0.2 mil=10 nl=20} threads=- dout=%Output% loader=Text{col=Label:TX:0 col=Features:1-*} data=%Data% seed=1 xf=Term{col=Label} Not adding a normalizer. -Auto-tuning parameters: UseCat = False -Auto-tuning parameters: UseSoftmax = False +Auto-tuning parameters: UseCategoricalSplit = False +Auto-tuning parameters: UseSoftMaximum = False LightGBM objective=multiclassova Not training a calibrator because it is not needed. Not adding a normalizer. -Auto-tuning parameters: UseCat = False -Auto-tuning parameters: UseSoftmax = False +Auto-tuning parameters: UseCategoricalSplit = False +Auto-tuning parameters: UseSoftMaximum = False LightGBM objective=multiclassova Not training a calibrator because it is not needed. diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt index db69b4b0d8..9958fa93b8 100644 --- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt +++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt @@ -1,12 +1,12 @@ maml.exe CV tr=LightGBMMC{nt=1 iter=10 v=- lr=0.2 mil=10 nl=20} threads=- dout=%Output% loader=Text{col=Label:U4[0-2]:0 col=Features:1-4} data=%Data% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False -Auto-tuning parameters: UseSoftmax = False +Auto-tuning parameters: UseCategoricalSplit = False +Auto-tuning parameters: UseSoftMaximum = False LightGBM objective=multiclassova Not training a calibrator because it is not needed. Not adding a normalizer. -Auto-tuning parameters: UseCat = False -Auto-tuning parameters: UseSoftmax = False +Auto-tuning parameters: UseCategoricalSplit = False +Auto-tuning parameters: UseSoftMaximum = False LightGBM objective=multiclassova Not training a calibrator because it is not needed. diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt index 1c4cb95912..a92727951e 100644 --- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt +++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt @@ -1,7 +1,7 @@ maml.exe TrainTest test=%Data% tr=LightGBMMC{nt=1 iter=10 v=- lr=0.2 mil=10 nl=20} dout=%Output% loader=Text{col=Label:TX:0 col=Features:1-*} data=%Data% out=%Output% seed=1 xf=Term{col=Label} Not adding a normalizer. -Auto-tuning parameters: UseCat = False -Auto-tuning parameters: UseSoftmax = False +Auto-tuning parameters: UseCategoricalSplit = False +Auto-tuning parameters: UseSoftMaximum = False LightGBM objective=multiclassova Not training a calibrator because it is not needed. diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt index 1de8c3d919..e0001f3d38 100644 --- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt +++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt @@ -1,7 +1,7 @@ maml.exe TrainTest test=%Data% tr=LightGBMMC{nt=1 iter=10 v=- lr=0.2 mil=10 nl=20} dout=%Output% loader=Text{col=Label:U4[0-2]:0 col=Features:1-4} data=%Data% out=%Output% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False -Auto-tuning parameters: UseSoftmax = False +Auto-tuning parameters: UseCategoricalSplit = False +Auto-tuning parameters: UseSoftMaximum = False LightGBM objective=multiclassova Not training a calibrator because it is not needed. diff --git a/test/BaselineOutput/Common/LightGBMR/LightGBMReg-CV-generatedRegressionDataset-out.txt b/test/BaselineOutput/Common/LightGBMR/LightGBMReg-CV-generatedRegressionDataset-out.txt index afa867d488..1fc6084997 100644 --- a/test/BaselineOutput/Common/LightGBMR/LightGBMReg-CV-generatedRegressionDataset-out.txt +++ b/test/BaselineOutput/Common/LightGBMR/LightGBMReg-CV-generatedRegressionDataset-out.txt @@ -1,10 +1,10 @@ maml.exe CV tr=LightGBMR{nt=1 iter=50 v=+ booster=gbdt{l1=0.2 l2=0.2} lr=0.2 mil=10 nl=20} threads=- dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=regression Not training a calibrator because it is not needed. Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=regression Not training a calibrator because it is not needed. L1(avg): 27.477977 diff --git a/test/BaselineOutput/Common/LightGBMR/LightGBMReg-TrainTest-generatedRegressionDataset-out.txt b/test/BaselineOutput/Common/LightGBMR/LightGBMReg-TrainTest-generatedRegressionDataset-out.txt index f15a4bb020..909d9f0012 100644 --- a/test/BaselineOutput/Common/LightGBMR/LightGBMReg-TrainTest-generatedRegressionDataset-out.txt +++ b/test/BaselineOutput/Common/LightGBMR/LightGBMReg-TrainTest-generatedRegressionDataset-out.txt @@ -1,6 +1,6 @@ maml.exe TrainTest test=%Data% tr=LightGBMR{nt=1 iter=50 v=+ booster=gbdt{l1=0.2 l2=0.2} lr=0.2 mil=10 nl=20} dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% out=%Output% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=regression Not training a calibrator because it is not needed. L1(avg): 3.472291 diff --git a/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-CV-generatedRegressionDataset.MAE-out.txt b/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-CV-generatedRegressionDataset.MAE-out.txt index c2530555e1..4550a80d3c 100644 --- a/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-CV-generatedRegressionDataset.MAE-out.txt +++ b/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-CV-generatedRegressionDataset.MAE-out.txt @@ -1,10 +1,10 @@ maml.exe CV tr=LightGBMR{nt=1 iter=50 em=mae v=+ lr=0.2 mil=10 nl=20} threads=- dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=regression Not training a calibrator because it is not needed. Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=regression Not training a calibrator because it is not needed. L1(avg): 27.482854 diff --git a/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-TrainTest-generatedRegressionDataset.MAE-out.txt b/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-TrainTest-generatedRegressionDataset.MAE-out.txt index aaad5d20e5..59d2ceaa05 100644 --- a/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-TrainTest-generatedRegressionDataset.MAE-out.txt +++ b/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-TrainTest-generatedRegressionDataset.MAE-out.txt @@ -1,6 +1,6 @@ maml.exe TrainTest test=%Data% tr=LightGBMR{nt=1 iter=50 em=mae v=+ lr=0.2 mil=10 nl=20} dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% out=%Output% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=regression Not training a calibrator because it is not needed. L1(avg): 3.428896 diff --git a/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-CV-generatedRegressionDataset.RMSE-out.txt b/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-CV-generatedRegressionDataset.RMSE-out.txt index 483c724038..71d131bb5a 100644 --- a/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-CV-generatedRegressionDataset.RMSE-out.txt +++ b/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-CV-generatedRegressionDataset.RMSE-out.txt @@ -1,10 +1,10 @@ maml.exe CV tr=LightGBMR{nt=1 iter=50 em=rmse v=+ lr=0.2 mil=10 nl=20} threads=- dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=regression Not training a calibrator because it is not needed. Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=regression Not training a calibrator because it is not needed. L1(avg): 27.482854 diff --git a/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-TrainTest-generatedRegressionDataset.RMSE-out.txt b/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-TrainTest-generatedRegressionDataset.RMSE-out.txt index 1ed592dd87..c919475347 100644 --- a/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-TrainTest-generatedRegressionDataset.RMSE-out.txt +++ b/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-TrainTest-generatedRegressionDataset.RMSE-out.txt @@ -1,6 +1,6 @@ maml.exe TrainTest test=%Data% tr=LightGBMR{nt=1 iter=50 em=rmse v=+ lr=0.2 mil=10 nl=20} dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% out=%Output% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=regression Not training a calibrator because it is not needed. L1(avg): 3.428896 diff --git a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs index 731b013189..cc2257fe71 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs @@ -777,8 +777,8 @@ public void TestMultiClassEnsembleCombiner() LightGbm.TrainMultiClass(Env, new Options { FeatureColumnName = "Features", - NumBoostRound = 5, - NumLeaves = 4, + NumberOfIterations = 5, + NumberOfLeaves = 4, LabelColumnName = DefaultColumnNames.Label, TrainingData = dataView }).PredictorModel, diff --git a/test/Microsoft.ML.StaticPipelineTesting/Training.cs b/test/Microsoft.ML.StaticPipelineTesting/Training.cs index 3daaa1f3c2..cfdd9851d6 100644 --- a/test/Microsoft.ML.StaticPipelineTesting/Training.cs +++ b/test/Microsoft.ML.StaticPipelineTesting/Training.cs @@ -576,8 +576,8 @@ public void LightGbmRegression() var est = reader.MakeNewEstimator() .Append(r => (r.label, score: catalog.Trainers.LightGbm(r.label, r.features, - numBoostRound: 10, - numLeaves: 5, + numberOfIterations: 10, + numberOfLeaves: 5, onFit: (p) => { pred = p; }))); var pipe = reader.Append(est); diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs index 492a71a501..0983dbef3d 100644 --- a/test/Microsoft.ML.Tests/OnnxConversionTest.cs +++ b/test/Microsoft.ML.Tests/OnnxConversionTest.cs @@ -345,7 +345,7 @@ public void LightGbmBinaryClassificationOnnxConversionTest() var dynamicPipeline = mlContext.Transforms.Normalize("FeatureVector") .AppendCacheCheckpoint(mlContext) - .Append(mlContext.Regression.Trainers.LightGbm(labelColumnName: "Target", featureColumnName: "FeatureVector", numBoostRound: 3, numLeaves: 16, minDataPerLeaf: 100)); + .Append(mlContext.Regression.Trainers.LightGbm(labelColumnName: "Target", featureColumnName: "FeatureVector", numberOfIterations: 3, leafCount: 16, minimumDataPerLeaf: 100)); var model = dynamicPipeline.Fit(data); // Step 2: Convert ML.NET model to ONNX format and save it as a file. diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs index 8e6823739c..c495ae0428 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs @@ -685,8 +685,8 @@ private void ExecuteTFTransformMNISTConvTrainingTest(bool shuffle, int? shuffleS LabelColumnName = "Label", FeatureColumnName = "Features", Seed = 1, - NThread = 1, - NumBoostRound = 1 + NumberOfThreads = 1, + NumberOfIterations = 1 })); var trainedModel = pipe.Fit(preprocessedTrainData); diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/TreeEstimators.cs b/test/Microsoft.ML.Tests/TrainerEstimators/TreeEstimators.cs index 444db573da..db58372e6e 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/TreeEstimators.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/TreeEstimators.cs @@ -50,9 +50,9 @@ public void LightGBMBinaryEstimator() var trainer = ML.BinaryClassification.Trainers.LightGbm(new Options { - NumLeaves = 10, - NThread = 1, - MinDataPerLeaf = 2, + NumberOfLeaves = 10, + NumberOfThreads = 1, + MinimumDataPerLeaf = 2, }); var pipeWithTrainer = pipe.Append(trainer); @@ -169,9 +169,9 @@ public void LightGBMRegressorEstimator() var dataView = GetRegressionPipeline(); var trainer = ML.Regression.Trainers.LightGbm(new Options { - NThread = 1, + NumberOfThreads = 1, NormalizeFeatures = NormalizeOption.Warn, - CatL2 = 5, + L2Categorical = 5, }); TestEstimatorCore(trainer, dataView); @@ -295,10 +295,10 @@ private void LightGbmHelper(bool useSoftmax, out string modelString, out List