-
Notifications
You must be signed in to change notification settings - Fork 1.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Scrub changes for LightGBM #2808
Changes from 5 commits
8dd53e1
23812b5
ce21121
b958c37
90e3ee2
d32e44e
27172f1
a355b31
4a1fbb0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -21,10 +21,10 @@ public static class LightGbmStaticExtensions | |||||||
/// <param name="label">The label column.</param> | ||||||||
/// <param name="features">The features column.</param> | ||||||||
/// <param name="weights">The weights column.</param> | ||||||||
/// <param name="numLeaves">The number of leaves to use.</param> | ||||||||
/// <param name="numBoostRound">Number of iterations.</param> | ||||||||
/// <param name="minDataPerLeaf">The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.</param> | ||||||||
/// <param name="numberOfLeaves">The number of leaves to use.</param> | ||||||||
/// <param name="minimumDataPerLeaf">The minimal number of data points allowed in a leaf of the tree, out of the subsampled data.</param> | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
minimumExampleCountPerLeaf? #Resolved |
||||||||
/// <param name="learningRate">The learning rate.</param> | ||||||||
/// <param name="numberOfIterations">Number of iterations.</param> | ||||||||
/// <param name="onFit">A delegate that is called every time the | ||||||||
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the | ||||||||
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive | ||||||||
|
@@ -39,19 +39,19 @@ public static class LightGbmStaticExtensions | |||||||
/// </example> | ||||||||
public static Scalar<float> LightGbm(this RegressionCatalog.RegressionTrainers catalog, | ||||||||
Scalar<float> label, Vector<float> features, Scalar<float> weights = null, | ||||||||
int? numLeaves = null, | ||||||||
int? minDataPerLeaf = null, | ||||||||
int? numberOfLeaves = null, | ||||||||
int? minimumDataPerLeaf = null, | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
double? learningRate = null, | ||||||||
int numBoostRound = Options.Defaults.NumBoostRound, | ||||||||
int numberOfIterations = Options.Defaults.NumberOfIterations, | ||||||||
Action<LightGbmRegressionModelParameters> onFit = null) | ||||||||
{ | ||||||||
CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit); | ||||||||
CheckUserValues(label, features, weights, numberOfLeaves, minimumDataPerLeaf, learningRate, numberOfIterations, onFit); | ||||||||
|
||||||||
var rec = new TrainerEstimatorReconciler.Regression( | ||||||||
(env, labelName, featuresName, weightsName) => | ||||||||
{ | ||||||||
var trainer = new LightGbmRegressorTrainer(env, labelName, featuresName, weightsName, numLeaves, | ||||||||
minDataPerLeaf, learningRate, numBoostRound); | ||||||||
var trainer = new LightGbmRegressorTrainer(env, labelName, featuresName, weightsName, numberOfLeaves, | ||||||||
minimumDataPerLeaf, learningRate, numberOfIterations); | ||||||||
if (onFit != null) | ||||||||
return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); | ||||||||
return trainer; | ||||||||
|
@@ -104,10 +104,10 @@ public static Scalar<float> LightGbm(this RegressionCatalog.RegressionTrainers c | |||||||
/// <param name="label">The label column.</param> | ||||||||
/// <param name="features">The features column.</param> | ||||||||
/// <param name="weights">The weights column.</param> | ||||||||
/// <param name="numLeaves">The number of leaves to use.</param> | ||||||||
/// <param name="numBoostRound">Number of iterations.</param> | ||||||||
/// <param name="minDataPerLeaf">The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.</param> | ||||||||
/// <param name="numberOfLeaves">The number of leaves to use.</param> | ||||||||
/// <param name="minimumExampleCountPerLeaf">The minimal number of data points allowed in a leaf of the tree, out of the subsampled data.</param> | ||||||||
/// <param name="learningRate">The learning rate.</param> | ||||||||
/// <param name="numberOfIterations">Number of iterations.</param> | ||||||||
/// <param name="onFit">A delegate that is called every time the | ||||||||
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the | ||||||||
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive | ||||||||
|
@@ -122,20 +122,22 @@ public static Scalar<float> LightGbm(this RegressionCatalog.RegressionTrainers c | |||||||
/// ]]></format> | ||||||||
/// </example> | ||||||||
public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> predictedLabel) LightGbm(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, | ||||||||
Scalar<bool> label, Vector<float> features, Scalar<float> weights = null, | ||||||||
int? numLeaves = null, | ||||||||
int? minDataPerLeaf = null, | ||||||||
Scalar<bool> label, | ||||||||
Vector<float> features, | ||||||||
Scalar<float> weights = null, | ||||||||
int? numberOfLeaves = null, | ||||||||
int? minimumExampleCountPerLeaf = null, | ||||||||
double? learningRate = null, | ||||||||
int numBoostRound = Options.Defaults.NumBoostRound, | ||||||||
int numberOfIterations = Options.Defaults.NumberOfIterations, | ||||||||
Action<CalibratedModelParametersBase<LightGbmBinaryModelParameters, PlattCalibrator>> onFit = null) | ||||||||
{ | ||||||||
CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit); | ||||||||
CheckUserValues(label, features, weights, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations, onFit); | ||||||||
|
||||||||
var rec = new TrainerEstimatorReconciler.BinaryClassifier( | ||||||||
(env, labelName, featuresName, weightsName) => | ||||||||
{ | ||||||||
var trainer = new LightGbmBinaryTrainer(env, labelName, featuresName, weightsName, numLeaves, | ||||||||
minDataPerLeaf, learningRate, numBoostRound); | ||||||||
var trainer = new LightGbmBinaryTrainer(env, labelName, featuresName, weightsName, numberOfLeaves, | ||||||||
minimumExampleCountPerLeaf, learningRate, numberOfIterations); | ||||||||
|
||||||||
if (onFit != null) | ||||||||
return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); | ||||||||
|
@@ -194,10 +196,10 @@ public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> pred | |||||||
/// <param name="features">The features column.</param> | ||||||||
/// <param name="groupId">The groupId column.</param> | ||||||||
/// <param name="weights">The weights column.</param> | ||||||||
/// <param name="numLeaves">The number of leaves to use.</param> | ||||||||
/// <param name="numBoostRound">Number of iterations.</param> | ||||||||
/// <param name="minDataPerLeaf">The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.</param> | ||||||||
/// <param name="numberOfLeaves">The number of leaves to use.</param> | ||||||||
/// <param name="minimumDataPerLeaf">The minimal number of data points allowed in a leaf of the tree, out of the subsampled data.</param> | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
minimumExampleCountPerLeaf #Resolved |
||||||||
/// <param name="learningRate">The learning rate.</param> | ||||||||
/// <param name="numberOfIterations">Number of iterations.</param> | ||||||||
/// <param name="onFit">A delegate that is called every time the | ||||||||
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the | ||||||||
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive | ||||||||
|
@@ -206,21 +208,24 @@ public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> pred | |||||||
/// <returns>The set of output columns including in order the predicted binary classification score (which will range | ||||||||
/// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns> | ||||||||
public static Scalar<float> LightGbm<TVal>(this RankingCatalog.RankingTrainers catalog, | ||||||||
Scalar<float> label, Vector<float> features, Key<uint, TVal> groupId, Scalar<float> weights = null, | ||||||||
int? numLeaves = null, | ||||||||
int? minDataPerLeaf = null, | ||||||||
Scalar<float> label, | ||||||||
Vector<float> features, | ||||||||
Key<uint, TVal> groupId, | ||||||||
Scalar<float> weights = null, | ||||||||
int? numberOfLeaves = null, | ||||||||
int? minimumDataPerLeaf = null, | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
double? learningRate = null, | ||||||||
int numBoostRound = Options.Defaults.NumBoostRound, | ||||||||
int numberOfIterations = Options.Defaults.NumberOfIterations, | ||||||||
Action<LightGbmRankingModelParameters> onFit = null) | ||||||||
{ | ||||||||
CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit); | ||||||||
CheckUserValues(label, features, weights, numberOfLeaves, minimumDataPerLeaf, learningRate, numberOfIterations, onFit); | ||||||||
Contracts.CheckValue(groupId, nameof(groupId)); | ||||||||
|
||||||||
var rec = new TrainerEstimatorReconciler.Ranker<TVal>( | ||||||||
(env, labelName, featuresName, groupIdName, weightsName) => | ||||||||
{ | ||||||||
var trainer = new LightGbmRankingTrainer(env, labelName, featuresName, groupIdName, weightsName, numLeaves, | ||||||||
minDataPerLeaf, learningRate, numBoostRound); | ||||||||
var trainer = new LightGbmRankingTrainer(env, labelName, featuresName, groupIdName, weightsName, numberOfLeaves, | ||||||||
minimumDataPerLeaf, learningRate, numberOfIterations); | ||||||||
|
||||||||
if (onFit != null) | ||||||||
return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); | ||||||||
|
@@ -279,10 +284,10 @@ public static Scalar<float> LightGbm<TVal>(this RankingCatalog.RankingTrainers c | |||||||
/// <param name="label">The label, or dependent variable.</param> | ||||||||
/// <param name="features">The features, or independent variables.</param> | ||||||||
/// <param name="weights">The weights column.</param> | ||||||||
/// <param name="numLeaves">The number of leaves to use.</param> | ||||||||
/// <param name="numBoostRound">Number of iterations.</param> | ||||||||
/// <param name="minDataPerLeaf">The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.</param> | ||||||||
/// <param name="numberOfLeaves">The number of leaves to use.</param> | ||||||||
/// <param name="minimumDataPerLeaf">The minimal number of data points allowed in a leaf of the tree, out of the subsampled data.</param> | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
minimumExampleCountPerLeaf #Resolved |
||||||||
/// <param name="learningRate">The learning rate.</param> | ||||||||
/// <param name="numberOfIterations">Number of iterations.</param> | ||||||||
/// <param name="onFit">A delegate that is called every time the | ||||||||
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the | ||||||||
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive | ||||||||
|
@@ -301,19 +306,19 @@ public static (Vector<float> score, Key<uint, TVal> predictedLabel) | |||||||
Key<uint, TVal> label, | ||||||||
Vector<float> features, | ||||||||
Scalar<float> weights = null, | ||||||||
int? numLeaves = null, | ||||||||
int? minDataPerLeaf = null, | ||||||||
int? numberOfLeaves = null, | ||||||||
int? minimumDataPerLeaf = null, | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
minimumExampleCountPerLeaf #Resolved |
||||||||
double? learningRate = null, | ||||||||
int numBoostRound = Options.Defaults.NumBoostRound, | ||||||||
int numberOfIterations = Options.Defaults.NumberOfIterations, | ||||||||
Action<OneVersusAllModelParameters> onFit = null) | ||||||||
{ | ||||||||
CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit); | ||||||||
CheckUserValues(label, features, weights, numberOfLeaves, minimumDataPerLeaf, learningRate, numberOfIterations, onFit); | ||||||||
|
||||||||
var rec = new TrainerEstimatorReconciler.MulticlassClassifier<TVal>( | ||||||||
(env, labelName, featuresName, weightsName) => | ||||||||
{ | ||||||||
var trainer = new LightGbmMulticlassTrainer(env, labelName, featuresName, weightsName, numLeaves, | ||||||||
minDataPerLeaf, learningRate, numBoostRound); | ||||||||
var trainer = new LightGbmMulticlassTrainer(env, labelName, featuresName, weightsName, numberOfLeaves, | ||||||||
minimumDataPerLeaf, learningRate, numberOfIterations); | ||||||||
|
||||||||
if (onFit != null) | ||||||||
return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); | ||||||||
|
@@ -365,17 +370,17 @@ public static (Vector<float> score, Key<uint, TVal> predictedLabel) | |||||||
} | ||||||||
|
||||||||
private static void CheckUserValues(PipelineColumn label, Vector<float> features, Scalar<float> weights, | ||||||||
int? numLeaves, | ||||||||
int? minDataPerLeaf, | ||||||||
int? numberOfLeaves, | ||||||||
int? minimumExampleCountPerLeaf, | ||||||||
double? learningRate, | ||||||||
int numBoostRound, | ||||||||
Delegate onFit) | ||||||||
{ | ||||||||
Contracts.CheckValue(label, nameof(label)); | ||||||||
Contracts.CheckValue(features, nameof(features)); | ||||||||
Contracts.CheckValueOrNull(weights); | ||||||||
Contracts.CheckParam(!(numLeaves < 2), nameof(numLeaves), "Must be at least 2."); | ||||||||
Contracts.CheckParam(!(minDataPerLeaf <= 0), nameof(minDataPerLeaf), "Must be positive"); | ||||||||
Contracts.CheckParam(!(numberOfLeaves < 2), nameof(numberOfLeaves), "Must be at least 2."); | ||||||||
Contracts.CheckParam(!(minimumExampleCountPerLeaf <= 0), nameof(minimumExampleCountPerLeaf), "Must be positive"); | ||||||||
Contracts.CheckParam(!(learningRate <= 0), nameof(learningRate), "Must be positive"); | ||||||||
Contracts.CheckParam(numBoostRound > 0, nameof(numBoostRound), "Must be positive"); | ||||||||
Contracts.CheckValueOrNull(onFit); | ||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.