Skip to content

Commit

Permalink
LightGBM (#392)
Browse files Browse the repository at this point in the history
* LightGBM and test.

* add test baselines and nuget source for lightGBM binaries.

* Add entrypoint for lightGBM.

* add unsafe flag for release build.

* update nuget version.

* make lightgbm test single threaded.

* install gcc on OS machines to resolve dependencies on openmp thatis needed by lightgbm native code.

* PR comments. Leave BREW and GCC in bash script to verify macOS tests work.

* remove brew and gcc from build script.

* PR feedback.

* disable test on macOS.

* disable test on macOS.

* PR feedback.
  • Loading branch information
codemzs authored Jun 26, 2018
1 parent 93ecbb7 commit 0a349f8
Show file tree
Hide file tree
Showing 116 changed files with 72,868 additions and 50 deletions.
7 changes: 7 additions & 0 deletions Microsoft.ML.sln
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "netstandard2.0", "netstanda
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Sweeper.Tests", "test\Microsoft.ML.Sweeper.Tests\Microsoft.ML.Sweeper.Tests.csproj", "{3DEB504D-7A07-48CE-91A2-8047461CB3D4}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.LightGBM", "src\Microsoft.ML.LightGBM\Microsoft.ML.LightGBM.csproj", "{001F3B4E-FBE4-4001-AFD2-A6A989CD1C25}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -222,6 +224,10 @@ Global
{3DEB504D-7A07-48CE-91A2-8047461CB3D4}.Debug|Any CPU.Build.0 = Debug|Any CPU
{3DEB504D-7A07-48CE-91A2-8047461CB3D4}.Release|Any CPU.ActiveCfg = Release|Any CPU
{3DEB504D-7A07-48CE-91A2-8047461CB3D4}.Release|Any CPU.Build.0 = Release|Any CPU
{001F3B4E-FBE4-4001-AFD2-A6A989CD1C25}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{001F3B4E-FBE4-4001-AFD2-A6A989CD1C25}.Debug|Any CPU.Build.0 = Debug|Any CPU
{001F3B4E-FBE4-4001-AFD2-A6A989CD1C25}.Release|Any CPU.ActiveCfg = Release|Any CPU
{001F3B4E-FBE4-4001-AFD2-A6A989CD1C25}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -260,6 +266,7 @@ Global
{487213C9-E8A9-4F94-85D7-28A05DBBFE3A} = {DEC8F776-49F7-4D87-836C-FE4DC057D08C}
{9252A8EB-ABFB-440C-AB4D-1D562753CE0F} = {487213C9-E8A9-4F94-85D7-28A05DBBFE3A}
{3DEB504D-7A07-48CE-91A2-8047461CB3D4} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
{001F3B4E-FBE4-4001-AFD2-A6A989CD1C25} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D}
Expand Down
1 change: 1 addition & 0 deletions build/Dependencies.props
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@
<SystemCodeDomPackageVersion>4.4.0</SystemCodeDomPackageVersion>
<SystemReflectionEmitLightweightPackageVersion>4.3.0</SystemReflectionEmitLightweightPackageVersion>
<PublishSymbolsPackageVersion>1.0.0-beta-62824-02</PublishSymbolsPackageVersion>
<LightGBMPackageVersion>2.1.2.2</LightGBMPackageVersion>
</PropertyGroup>
</Project>
4 changes: 3 additions & 1 deletion docs/building/unix-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,11 @@ macOS 10.12 or higher is needed to build dotnet/machinelearning.

On macOS a few components are needed which are not provided by a default developer setup:
* cmake 3.10.3
* gcc
* All the requirements necessary to run .NET Core 2.0 applications. To view macOS prerequisites click [here](https://docs.microsoft.com/en-us/dotnet/core/macos-prerequisites?tabs=netcore2x).

One way of obtaining CMake is via [Homebrew](http://brew.sh):
One way of obtaining CMake and gcc is via [Homebrew](http://brew.sh):
```sh
$ brew install cmake
$ brew install gcc
```
13 changes: 13 additions & 0 deletions pkg/Microsoft.ML.LightGBM/Microsoft.ML.LightGBM.nupkgproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk" DefaultTargets="Pack">

<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<PackageDescription>ML.NET component for LightGBM</PackageDescription>
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="../Microsoft.ML/Microsoft.ML.nupkgproj" />
<PackageReference Include="LightGBM" Version="$(LightGBMPackageVersion)" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<Project DefaultTargets="Pack">

<Import Project="Microsoft.ML.LightGBM.nupkgproj" />

</Project>
414 changes: 414 additions & 0 deletions src/Microsoft.ML.LightGBM/LightGbmArguments.cs

Large diffs are not rendered by default.

147 changes: 147 additions & 0 deletions src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.EntryPoints;
using Microsoft.ML.Runtime.FastTree;
using Microsoft.ML.Runtime.Internal.Calibration;
using Microsoft.ML.Runtime.Internal.Internallearn;
using Microsoft.ML.Runtime.LightGBM;
using Microsoft.ML.Runtime.Model;

[assembly: LoadableClass(LightGbmBinaryTrainer.Summary, typeof(LightGbmBinaryTrainer), typeof(LightGbmArguments),
new[] { typeof(SignatureBinaryClassifierTrainer), typeof(SignatureTrainer), typeof(SignatureTreeEnsembleTrainer) },
"LightGBM Binary Classification", LightGbmBinaryTrainer.LoadNameValue, LightGbmBinaryTrainer.ShortName, DocName = "trainer/LightGBM.md")]

[assembly: LoadableClass(typeof(IPredictorProducing<float>), typeof(LightGbmBinaryPredictor), null, typeof(SignatureLoadModel),
"LightGBM Binary Executor",
LightGbmBinaryPredictor.LoaderSignature)]

[assembly: LoadableClass(typeof(void), typeof(LightGbm), null, typeof(SignatureEntryPointModule), "LightGBM")]

namespace Microsoft.ML.Runtime.LightGBM
{
public sealed class LightGbmBinaryPredictor : FastTreePredictionWrapper
{
public const string LoaderSignature = "LightGBMBinaryExec";
public const string RegistrationName = "LightGBMBinaryPredictor";
private static VersionInfo GetVersionInfo()
{
// REVIEW: can we decouple the version from FastTree predictor version ?
return new VersionInfo(
modelSignature: "LGBBINCL",
// verWrittenCur: 0x00010001, // Initial
// verWrittenCur: 0x00010002, // _numFeatures serialized
// verWrittenCur: 0x00010003, // Ini content out of predictor
//verWrittenCur: 0x00010004, // Add _defaultValueForMissing
verWrittenCur: 0x00010005, // Categorical splits.
verReadableCur: 0x00010004,
verWeCanReadBack: 0x00010001,
loaderSignature: LoaderSignature);
}

protected override uint VerNumFeaturesSerialized { get { return 0x00010002; } }

protected override uint VerDefaultValueSerialized { get { return 0x00010004; } }

protected override uint VerCategoricalSplitSerialized { get { return 0x00010005; } }

internal LightGbmBinaryPredictor(IHostEnvironment env, FastTree.Internal.Ensemble trainedEnsemble, int featureCount, string innerArgs)
: base(env, RegistrationName, trainedEnsemble, featureCount, innerArgs)
{
}

private LightGbmBinaryPredictor(IHostEnvironment env, ModelLoadContext ctx)
: base(env, RegistrationName, ctx, GetVersionInfo())
{
}

protected override void SaveCore(ModelSaveContext ctx)
{
base.SaveCore(ctx);
ctx.SetVersionInfo(GetVersionInfo());
}

public static IPredictorProducing<float> Create(IHostEnvironment env, ModelLoadContext ctx)
{
Contracts.CheckValue(env, nameof(env));
env.CheckValue(ctx, nameof(ctx));
ctx.CheckAtModel(GetVersionInfo());
var predictor = new LightGbmBinaryPredictor(env, ctx);
ICalibrator calibrator;
ctx.LoadModelOrNull<ICalibrator, SignatureLoadModel>(env, out calibrator, @"Calibrator");
if (calibrator == null)
return predictor;
return new CalibratedPredictor(env, predictor, calibrator);
}

public override PredictionKind PredictionKind { get { return PredictionKind.BinaryClassification; } }
}

public sealed class LightGbmBinaryTrainer : LightGbmTrainerBase<float, IPredictorWithFeatureWeights<float>>
{
public const string Summary = "LightGBM Binary Classifier";
public const string LoadNameValue = "LightGBMBinary";
public const string ShortName = "LightGBM";

public LightGbmBinaryTrainer(IHostEnvironment env, LightGbmArguments args)
: base(env, args, PredictionKind.BinaryClassification, "LGBBINCL")
{
}

public override IPredictorWithFeatureWeights<float> CreatePredictor()
{
Host.Check(TrainedEnsemble != null, "The predictor cannot be created before training is complete");
var innerArgs = LightGbmInterfaceUtils.JoinParameters(Options);
var pred = new LightGbmBinaryPredictor(Host, TrainedEnsemble, FeatureCount, innerArgs);
var cali = new PlattCalibrator(Host, -0.5, 0);
return new FeatureWeightsCalibratedPredictor(Host, pred, cali);
}

protected override void CheckDataValid(IChannel ch, RoleMappedData data)
{
Host.AssertValue(ch);
base.CheckDataValid(ch, data);
var labelType = data.Schema.Label.Type;
if (!(labelType.IsBool || labelType.IsKey || labelType == NumberType.R4))
{
throw ch.ExceptParam(nameof(data),
$"Label column '{data.Schema.Label.Name}' is of type '{labelType}', but must be key, boolean or R4.");
}
}

protected override void CheckAndUpdateParametersBeforeTraining(IChannel ch, RoleMappedData data, float[] labels, int[] groups)
{
Options["objective"] = "binary";
// Add default metric.
if (!Options.ContainsKey("metric"))
Options["metric"] = "binary_logloss";
}
}

/// <summary>
/// A component to train an LightGBM model.
/// </summary>
public static partial class LightGbm
{
[TlcModule.EntryPoint(
Name = "Trainers.LightGbmBinaryClassifier",
Desc = "Train an LightGBM binary class model",
UserName = LightGbmBinaryTrainer.Summary,
ShortName = LightGbmBinaryTrainer.ShortName)]
public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironment env, LightGbmArguments input)
{
Contracts.CheckValue(env, nameof(env));
var host = env.Register("TrainLightGBM");
host.CheckValue(input, nameof(input));
EntryPointUtils.CheckInputArgs(host, input);

return LearnerEntryPointsUtils.Train<LightGbmArguments, CommonOutputs.BinaryClassificationOutput>(host, input,
() => new LightGbmBinaryTrainer(host, input),
getLabel: () => LearnerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.LabelColumn),
getWeight: () => LearnerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.WeightColumn));
}
}
}
Loading

0 comments on commit 0a349f8

Please sign in to comment.