Skip to content

Commit

Permalink
- Minor updates
Browse files Browse the repository at this point in the history
  • Loading branch information
singlis committed Mar 2, 2019
1 parent b958c37 commit 90e3ee2
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 70 deletions.
23 changes: 12 additions & 11 deletions src/Microsoft.ML.LightGBM/LightGbmArguments.cs
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,9 @@ private static string GetOptionName(string name)
return strBuf.ToString();
}

// Static name map that maps friendly names to lightGBM arguments.
// There is a conversion that will convert the field name to a lightGBM name
// (but lowercasing and adding an underscore between words). In
// Static override name map that maps friendly names to lightGBM arguments.
// If an argument is not here, then its name is identicaltto a lightGBM argument
// and does not require a mapping, for example, Subsample.
private static Dictionary<string, string> _nameMapping = new Dictionary<string, string>()
{
{nameof(TreeBooster.Options.MinimumSplitGain), "min_split_gain" },
Expand All @@ -110,7 +110,7 @@ private static string GetOptionName(string name)
{nameof(TreeBooster.Options.L2Regularization), "reg_lambda"},
{nameof(TreeBooster.Options.WeightOfPositiveExamples), "scale_pos_weight"},
{nameof(DartBooster.Options.TreeDropFraction), "drop_rate" },
{nameof(DartBooster.Options.MaximumDroppedTreesPerRound), "max_drop" },
{nameof(DartBooster.Options.MaximumDroppedTreeCountPerRound), "max_drop" },
{nameof(DartBooster.Options.SkipDropFraction), "skip_drop" },
{nameof(MinimumExampleCountPerLeaf), "min_data_per_leaf"},
{nameof(NumberOfLeaves), "num_leaves"},
Expand Down Expand Up @@ -159,7 +159,8 @@ public class Options : ISupportBoosterParameterFactory

[Argument(ArgumentType.AtMostOnce,
HelpText = "Subsample frequency for bagging. 0 means no subsample. "
+ "If subsampleFreq > 0, it will use a subset to train and the subset will be updated on every Subsample iteration.")]
+ "Specifies the frequency at which the bagging occurs, where if this is set to N, the subsampling will happen at N iterations." +
"This must be set with Subsample as this specifies the amount to subsample.")]
[TlcModule.Range(Min = 0, Max = int.MaxValue)]
public int SubsampleFrequency = 0;

Expand All @@ -177,15 +178,15 @@ public class Options : ISupportBoosterParameterFactory

[Argument(ArgumentType.AtMostOnce,
HelpText = "L2 regularization term on weights, increasing this value will make model more conservative.",
ShortName = "l2,RegLambda")]
ShortName = "l2")]
[TlcModule.Range(Min = 0.0)]
[TGUI(Label = "Lambda(L2)", SuggestedSweeps = "0,0.5,1")]
[TlcModule.SweepableDiscreteParam("RegLambda", new object[] { 0f, 0.5f, 1f })]
public double L2Regularization = 0.01;

[Argument(ArgumentType.AtMostOnce,
HelpText = "L1 regularization term on weights, increase this value will make model more conservative.",
ShortName = "l1,RegAlpha")]
ShortName = "l1")]
[TlcModule.Range(Min = 0.0)]
[TGUI(Label = "Alpha(L1)", SuggestedSweeps = "0,0.5,1")]
[TlcModule.SweepableDiscreteParam("RegAlpha", new object[] { 0f, 0.5f, 1f })]
Expand Down Expand Up @@ -235,7 +236,7 @@ public sealed class Options : TreeBooster.Options

[Argument(ArgumentType.AtMostOnce, HelpText = "Maximum number of dropped tree in a boosting round.")]
[TlcModule.Range(Inf = 0, Max = int.MaxValue)]
public int MaximumDroppedTreesPerRound = 1;
public int MaximumDroppedTreeCountPerRound = 1;

[Argument(ArgumentType.AtMostOnce, HelpText = "Probability for not dropping in a boosting round.")]
[TlcModule.Range(Inf = 0.0, Max = 1.0)]
Expand Down Expand Up @@ -358,7 +359,7 @@ public enum EvalMetricType

[Argument(ArgumentType.AtMostOnce, HelpText = "Use softmax loss for the multi classification.")]
[TlcModule.SweepableDiscreteParam("UseSoftmax", new object[] { true, false })]
public bool? UseSoftMax;
public bool? UseSoftmax;

[Argument(ArgumentType.AtMostOnce, HelpText = "Rounds of early stopping, 0 will disable it.",
ShortName = "es")]
Expand All @@ -382,7 +383,7 @@ public enum EvalMetricType

[Argument(ArgumentType.AtMostOnce, HelpText = "Enable special handling of missing value or not.")]
[TlcModule.SweepableDiscreteParam("UseMissing", new object[] { true, false })]
public bool UseMissing = false;
public bool HandleMissingValue = false;

[Argument(ArgumentType.AtMostOnce, HelpText = "Minimum number of instances per categorical group.", ShortName = "mdpg")]
[TlcModule.Range(Inf = 0, Max = int.MaxValue)]
Expand Down Expand Up @@ -459,7 +460,7 @@ internal Dictionary<string, object> ToDictionary(IHost host)
res[GetOptionName(nameof(metric))] = metric;
res[GetOptionName(nameof(Sigmoid))] = Sigmoid;
res[GetOptionName(nameof(CustomGains))] = CustomGains;
res[GetOptionName(nameof(UseMissing))] = UseMissing;
res[GetOptionName(nameof(HandleMissingValue))] = HandleMissingValue;
res[GetOptionName(nameof(MinimumExampleCountPerGroup))] = MinimumExampleCountPerGroup;
res[GetOptionName(nameof(MaximumCategoricalSplitPointCount))] = MaximumCategoricalSplitPointCount;
res[GetOptionName(nameof(CategoricalSmoothing))] = CategoricalSmoothing;
Expand Down
6 changes: 3 additions & 3 deletions src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -109,20 +109,20 @@ internal LightGbmBinaryTrainer(IHostEnvironment env, Options options)
/// <param name="env">The private instance of <see cref="IHostEnvironment"/>.</param>
/// <param name="labelColumnName">The name of The label column.</param>
/// <param name="featureColumnName">The name of the feature column.</param>
/// <param name="weights">The name for the column containing the initial weight.</param>
/// <param name="exampleWeightColumnName">The name for the column containing the initial weight.</param>
/// <param name="numberOfLeaves">The number of leaves to use.</param>
/// <param name="minimumExampleCountPerLeaf">The minimal number of data points allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="numberOfIterations">Number of iterations.</param>
internal LightGbmBinaryTrainer(IHostEnvironment env,
string labelColumnName = DefaultColumnNames.Label,
string featureColumnName = DefaultColumnNames.Features,
string weights = null,
string exampleWeightColumnName = null,
int? numberOfLeaves = null,
int? minimumExampleCountPerLeaf = null,
double? learningRate = null,
int numberOfIterations = LightGBM.Options.Defaults.NumberOfIterations)
: base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(labelColumnName), featureColumnName, weights, null, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations)
: base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(labelColumnName), featureColumnName, exampleWeightColumnName, null, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations)
{
}

Expand Down
12 changes: 6 additions & 6 deletions src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,20 @@ internal LightGbmMulticlassTrainer(IHostEnvironment env, Options options)
/// <param name="env">The private instance of <see cref="IHostEnvironment"/>.</param>
/// <param name="labelColumnName">The name of The label column.</param>
/// <param name="featureColumnName">The name of the feature column.</param>
/// <param name="weights">The name for the column containing the initial weight.</param>
/// <param name="exampleWeightColumnName">The name for the column containing the initial weight.</param>
/// <param name="numberOfLeaves">The number of leaves to use.</param>
/// <param name="minimumExampleCountPerLeaf">The minimal number of data points allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="numberOfIterations">The number of iterations to use.</param>
internal LightGbmMulticlassTrainer(IHostEnvironment env,
string labelColumnName = DefaultColumnNames.Label,
string featureColumnName = DefaultColumnNames.Features,
string weights = null,
string exampleWeightColumnName = null,
int? numberOfLeaves = null,
int? minimumExampleCountPerLeaf = null,
double? learningRate = null,
int numberOfIterations = LightGBM.Options.Defaults.NumberOfIterations)
: base(env, LoadNameValue, TrainerUtils.MakeU4ScalarColumn(labelColumnName), featureColumnName, weights, null, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations)
: base(env, LoadNameValue, TrainerUtils.MakeU4ScalarColumn(labelColumnName), featureColumnName, exampleWeightColumnName, null, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations)
{
_numClass = -1;
}
Expand Down Expand Up @@ -182,14 +182,14 @@ private protected override void CheckAndUpdateParametersBeforeTraining(IChannel
Options["num_class"] = _numClass;
bool useSoftmax = false;

if (LightGbmTrainerOptions.UseSoftMax.HasValue)
useSoftmax = LightGbmTrainerOptions.UseSoftMax.Value;
if (LightGbmTrainerOptions.UseSoftmax.HasValue)
useSoftmax = LightGbmTrainerOptions.UseSoftmax.Value;
else
{
if (labels.Length >= _minDataToUseSoftmax)
useSoftmax = true;

ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.UseSoftMax) + " = " + useSoftmax);
ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.UseSoftmax) + " = " + useSoftmax);
}

if (useSoftmax)
Expand Down
6 changes: 3 additions & 3 deletions src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -87,20 +87,20 @@ public sealed class LightGbmRegressorTrainer : LightGbmTrainerBase<float, Regres
/// <param name="env">The private instance of <see cref="IHostEnvironment"/>.</param>
/// <param name="labelColumnName">The name of the label column.</param>
/// <param name="featureColumnName">The name of the feature column.</param>
/// <param name="weightsColumnName">The name for the column containing the initial weight.</param>
/// <param name="exampleWeightColumnName">The name for the column containing the initial weight.</param>
/// <param name="numberOfLeaves">The number of leaves to use.</param>
/// <param name="minimumExampleCountPerLeaf">The minimal number of data points allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="numberOfIterations">Number of iterations.</param>
internal LightGbmRegressorTrainer(IHostEnvironment env,
string labelColumnName = DefaultColumnNames.Label,
string featureColumnName = DefaultColumnNames.Features,
string weightsColumnName = null,
string exampleWeightColumnName = null,
int? numberOfLeaves = null,
int? minimumExampleCountPerLeaf = null,
double? learningRate = null,
int numberOfIterations = LightGBM.Options.Defaults.NumberOfIterations)
: base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumnName), featureColumnName, weightsColumnName, null, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations)
: base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumnName), featureColumnName, exampleWeightColumnName, null, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations)
{
}

Expand Down
Loading

0 comments on commit 90e3ee2

Please sign in to comment.