Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scrub changes for LightGBM #2808

Merged
merged 9 commits into from
Mar 5, 2019
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public static void Example()
FeatureColumnName = "Features",
Booster = new DartBooster.Options
{
DropRate = 0.15,
TreeDropFraction = 0.15,
XgboostDartMode = false
}
}))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ public static void Example()

// Create the Estimator pipeline. For simplicity, we will train a small tree with 4 leaves and 2 boosting iterations.
var pipeline = mlContext.Ranking.Trainers.LightGbm(
numLeaves: 4,
minDataPerLeaf: 10,
numberOfLeaves: 4,
minimumExampleCountPerLeaf: 10,
learningRate: 0.1,
numBoostRound: 2);
numberOfIterations: 2);

// Fit this Pipeline to the Training Data.
var model = pipeline.Fit(split.TrainSet);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ public static void Example()
var pipeline = mlContext.Ranking.Trainers.LightGbm(
new Options
{
NumLeaves = 4,
MinDataPerLeaf = 10,
NumberOfLeaves = 4,
MinimumExampleCountPerGroup = 10,
LearningRate = 0.1,
NumBoostRound = 2,
NumberOfIterations = 2,
Booster = new TreeBooster.Options
{
FeatureFraction = 0.9
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ public static void Example()
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
.Append(mlContext.Regression.Trainers.LightGbm(
labelColumnName: labelName,
numLeaves: 4,
minDataPerLeaf: 6,
numberOfLeaves: 4,
minimumExampleCountPerLeaf: 6,
learningRate: 0.001));

// Fit this pipeline to the training data.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ public static void Example()
.Append(mlContext.Regression.Trainers.LightGbm(new Options
{
LabelColumnName = labelName,
NumLeaves = 4,
MinDataPerLeaf = 6,
NumberOfLeaves = 4,
MinimumExampleCountPerLeaf = 6,
LearningRate = 0.001,
Booster = new GossBooster.Options
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ public static void LightGbmBinaryClassification()
Score: mlContext.BinaryClassification.Trainers.LightGbm(
row.Label,
row.Features,
numLeaves: 4,
minDataPerLeaf: 6,
numberOfLeaves: 4,
minimumExampleCountPerLeaf: 6,
learningRate: 0.001)))
.Append(row => (
Label: row.Label,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ public static void LightGbmRegression()
.Append(r => (r.label, score: mlContext.Regression.Trainers.LightGbm(
r.label,
r.features,
numLeaves: 4,
minDataPerLeaf: 6,
numberOfLeaves: 4,
minimumDataPerLeaf: 6,
Copy link
Member

@wschin wschin Mar 1, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
minimumDataPerLeaf: 6,
minimumExampleCountPerLeaf: 6,
``` #Resolved

learningRate: 0.001,
onFit: p => pred = p)
)
Expand Down
89 changes: 47 additions & 42 deletions src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ public static class LightGbmStaticExtensions
/// <param name="label">The label column.</param>
/// <param name="features">The features column.</param>
/// <param name="weights">The weights column.</param>
/// <param name="numLeaves">The number of leaves to use.</param>
/// <param name="numBoostRound">Number of iterations.</param>
/// <param name="minDataPerLeaf">The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="numberOfLeaves">The number of leaves to use.</param>
/// <param name="minimumDataPerLeaf">The minimal number of data points allowed in a leaf of the tree, out of the subsampled data.</param>
Copy link
Member

@wschin wschin Mar 2, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minimumDataPerLeaf [](start = 25, length = 18)

minimumExampleCountPerLeaf? #Resolved

/// <param name="learningRate">The learning rate.</param>
/// <param name="numberOfIterations">Number of iterations.</param>
/// <param name="onFit">A delegate that is called every time the
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
Expand All @@ -39,19 +39,19 @@ public static class LightGbmStaticExtensions
/// </example>
public static Scalar<float> LightGbm(this RegressionCatalog.RegressionTrainers catalog,
Scalar<float> label, Vector<float> features, Scalar<float> weights = null,
int? numLeaves = null,
int? minDataPerLeaf = null,
int? numberOfLeaves = null,
int? minimumDataPerLeaf = null,
Copy link
Member

@wschin wschin Mar 1, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
int? minimumDataPerLeaf = null,
int? minimumExampleCountPerLeaf = null,
``` #Resolved

double? learningRate = null,
int numBoostRound = Options.Defaults.NumBoostRound,
int numberOfIterations = Options.Defaults.NumberOfIterations,
Action<LightGbmRegressionModelParameters> onFit = null)
{
CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit);
CheckUserValues(label, features, weights, numberOfLeaves, minimumDataPerLeaf, learningRate, numberOfIterations, onFit);

var rec = new TrainerEstimatorReconciler.Regression(
(env, labelName, featuresName, weightsName) =>
{
var trainer = new LightGbmRegressorTrainer(env, labelName, featuresName, weightsName, numLeaves,
minDataPerLeaf, learningRate, numBoostRound);
var trainer = new LightGbmRegressorTrainer(env, labelName, featuresName, weightsName, numberOfLeaves,
minimumDataPerLeaf, learningRate, numberOfIterations);
if (onFit != null)
return trainer.WithOnFitDelegate(trans => onFit(trans.Model));
return trainer;
Expand Down Expand Up @@ -104,10 +104,10 @@ public static Scalar<float> LightGbm(this RegressionCatalog.RegressionTrainers c
/// <param name="label">The label column.</param>
/// <param name="features">The features column.</param>
/// <param name="weights">The weights column.</param>
/// <param name="numLeaves">The number of leaves to use.</param>
/// <param name="numBoostRound">Number of iterations.</param>
/// <param name="minDataPerLeaf">The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="numberOfLeaves">The number of leaves to use.</param>
/// <param name="minimumExampleCountPerLeaf">The minimal number of data points allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="numberOfIterations">Number of iterations.</param>
/// <param name="onFit">A delegate that is called every time the
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
Expand All @@ -122,20 +122,22 @@ public static Scalar<float> LightGbm(this RegressionCatalog.RegressionTrainers c
/// ]]></format>
/// </example>
public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> predictedLabel) LightGbm(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
Scalar<bool> label, Vector<float> features, Scalar<float> weights = null,
int? numLeaves = null,
int? minDataPerLeaf = null,
Scalar<bool> label,
Vector<float> features,
Scalar<float> weights = null,
int? numberOfLeaves = null,
int? minimumExampleCountPerLeaf = null,
double? learningRate = null,
int numBoostRound = Options.Defaults.NumBoostRound,
int numberOfIterations = Options.Defaults.NumberOfIterations,
Action<CalibratedModelParametersBase<LightGbmBinaryModelParameters, PlattCalibrator>> onFit = null)
{
CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit);
CheckUserValues(label, features, weights, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations, onFit);

var rec = new TrainerEstimatorReconciler.BinaryClassifier(
(env, labelName, featuresName, weightsName) =>
{
var trainer = new LightGbmBinaryTrainer(env, labelName, featuresName, weightsName, numLeaves,
minDataPerLeaf, learningRate, numBoostRound);
var trainer = new LightGbmBinaryTrainer(env, labelName, featuresName, weightsName, numberOfLeaves,
minimumExampleCountPerLeaf, learningRate, numberOfIterations);

if (onFit != null)
return trainer.WithOnFitDelegate(trans => onFit(trans.Model));
Expand Down Expand Up @@ -194,10 +196,10 @@ public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> pred
/// <param name="features">The features column.</param>
/// <param name="groupId">The groupId column.</param>
/// <param name="weights">The weights column.</param>
/// <param name="numLeaves">The number of leaves to use.</param>
/// <param name="numBoostRound">Number of iterations.</param>
/// <param name="minDataPerLeaf">The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="numberOfLeaves">The number of leaves to use.</param>
/// <param name="minimumDataPerLeaf">The minimal number of data points allowed in a leaf of the tree, out of the subsampled data.</param>
Copy link
Member

@wschin wschin Mar 2, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minimumDataPerLeaf [](start = 25, length = 18)

minimumExampleCountPerLeaf #Resolved

/// <param name="learningRate">The learning rate.</param>
/// <param name="numberOfIterations">Number of iterations.</param>
/// <param name="onFit">A delegate that is called every time the
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
Expand All @@ -206,21 +208,24 @@ public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> pred
/// <returns>The set of output columns including in order the predicted binary classification score (which will range
/// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns>
public static Scalar<float> LightGbm<TVal>(this RankingCatalog.RankingTrainers catalog,
Scalar<float> label, Vector<float> features, Key<uint, TVal> groupId, Scalar<float> weights = null,
int? numLeaves = null,
int? minDataPerLeaf = null,
Scalar<float> label,
Vector<float> features,
Key<uint, TVal> groupId,
Scalar<float> weights = null,
int? numberOfLeaves = null,
int? minimumDataPerLeaf = null,
Copy link
Member

@wschin wschin Mar 1, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
int? minimumDataPerLeaf = null,
int? minimumExampleCountPerLeaf = null,
``` #Resolved

double? learningRate = null,
int numBoostRound = Options.Defaults.NumBoostRound,
int numberOfIterations = Options.Defaults.NumberOfIterations,
Action<LightGbmRankingModelParameters> onFit = null)
{
CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit);
CheckUserValues(label, features, weights, numberOfLeaves, minimumDataPerLeaf, learningRate, numberOfIterations, onFit);
Contracts.CheckValue(groupId, nameof(groupId));

var rec = new TrainerEstimatorReconciler.Ranker<TVal>(
(env, labelName, featuresName, groupIdName, weightsName) =>
{
var trainer = new LightGbmRankingTrainer(env, labelName, featuresName, groupIdName, weightsName, numLeaves,
minDataPerLeaf, learningRate, numBoostRound);
var trainer = new LightGbmRankingTrainer(env, labelName, featuresName, groupIdName, weightsName, numberOfLeaves,
minimumDataPerLeaf, learningRate, numberOfIterations);

if (onFit != null)
return trainer.WithOnFitDelegate(trans => onFit(trans.Model));
Expand Down Expand Up @@ -279,10 +284,10 @@ public static Scalar<float> LightGbm<TVal>(this RankingCatalog.RankingTrainers c
/// <param name="label">The label, or dependent variable.</param>
/// <param name="features">The features, or independent variables.</param>
/// <param name="weights">The weights column.</param>
/// <param name="numLeaves">The number of leaves to use.</param>
/// <param name="numBoostRound">Number of iterations.</param>
/// <param name="minDataPerLeaf">The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="numberOfLeaves">The number of leaves to use.</param>
/// <param name="minimumDataPerLeaf">The minimal number of data points allowed in a leaf of the tree, out of the subsampled data.</param>
Copy link
Member

@wschin wschin Mar 2, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minimumDataPerLeaf [](start = 25, length = 18)

minimumExampleCountPerLeaf #Resolved

/// <param name="learningRate">The learning rate.</param>
/// <param name="numberOfIterations">Number of iterations.</param>
/// <param name="onFit">A delegate that is called every time the
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
Expand All @@ -301,19 +306,19 @@ public static (Vector<float> score, Key<uint, TVal> predictedLabel)
Key<uint, TVal> label,
Vector<float> features,
Scalar<float> weights = null,
int? numLeaves = null,
int? minDataPerLeaf = null,
int? numberOfLeaves = null,
int? minimumDataPerLeaf = null,
Copy link
Member

@wschin wschin Mar 2, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minimumDataPerLeaf [](start = 17, length = 18)

minimumExampleCountPerLeaf #Resolved

double? learningRate = null,
int numBoostRound = Options.Defaults.NumBoostRound,
int numberOfIterations = Options.Defaults.NumberOfIterations,
Action<OneVersusAllModelParameters> onFit = null)
{
CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit);
CheckUserValues(label, features, weights, numberOfLeaves, minimumDataPerLeaf, learningRate, numberOfIterations, onFit);

var rec = new TrainerEstimatorReconciler.MulticlassClassifier<TVal>(
(env, labelName, featuresName, weightsName) =>
{
var trainer = new LightGbmMulticlassTrainer(env, labelName, featuresName, weightsName, numLeaves,
minDataPerLeaf, learningRate, numBoostRound);
var trainer = new LightGbmMulticlassTrainer(env, labelName, featuresName, weightsName, numberOfLeaves,
minimumDataPerLeaf, learningRate, numberOfIterations);

if (onFit != null)
return trainer.WithOnFitDelegate(trans => onFit(trans.Model));
Expand Down Expand Up @@ -365,17 +370,17 @@ public static (Vector<float> score, Key<uint, TVal> predictedLabel)
}

private static void CheckUserValues(PipelineColumn label, Vector<float> features, Scalar<float> weights,
int? numLeaves,
int? minDataPerLeaf,
int? numberOfLeaves,
int? minimumExampleCountPerLeaf,
double? learningRate,
int numBoostRound,
Delegate onFit)
{
Contracts.CheckValue(label, nameof(label));
Contracts.CheckValue(features, nameof(features));
Contracts.CheckValueOrNull(weights);
Contracts.CheckParam(!(numLeaves < 2), nameof(numLeaves), "Must be at least 2.");
Contracts.CheckParam(!(minDataPerLeaf <= 0), nameof(minDataPerLeaf), "Must be positive");
Contracts.CheckParam(!(numberOfLeaves < 2), nameof(numberOfLeaves), "Must be at least 2.");
Contracts.CheckParam(!(minimumExampleCountPerLeaf <= 0), nameof(minimumExampleCountPerLeaf), "Must be positive");
Contracts.CheckParam(!(learningRate <= 0), nameof(learningRate), "Must be positive");
Contracts.CheckParam(numBoostRound > 0, nameof(numBoostRound), "Must be positive");
Contracts.CheckValueOrNull(onFit);
Expand Down
Loading