Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] Add support for linear tree with device=gpu #6567

Merged
merged 21 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
b030c63
basic gpu_linear_tree_learner implementation
dragonbra Jul 18, 2024
80f61e6
corresponding config of gpu linear tree
dragonbra Jul 18, 2024
bfb43de
Merge branch 'microsoft:master' into gpu_linear_tree
dragonbra Jul 23, 2024
94ad3e5
Update src/io/config.cpp
dragonbra Jul 25, 2024
a197bbc
Merge branch 'master' into gpu_linear_tree
shiyu1994 Aug 22, 2024
c7d4a74
Merge branch 'master' into gpu_linear_tree
shiyu1994 Aug 27, 2024
7d85909
Merge branch 'master' into gpu_linear_tree
shiyu1994 Sep 6, 2024
44accd1
Merge branch 'master' into gpu_linear_tree
shiyu1994 Oct 1, 2024
b8b1478
work around for gpu linear tree learner without gpu enabled
shiyu1994 Oct 1, 2024
b16a127
add #endif
shiyu1994 Oct 2, 2024
4fdbe07
add #ifdef USE_GPU
shiyu1994 Oct 2, 2024
edfc8cb
fix lint problems
shiyu1994 Oct 2, 2024
c7a72a3
Merge branch 'master' into gpu_linear_tree
shiyu1994 Oct 8, 2024
3a05c78
fix compilation when USE_GPU is OFF
shiyu1994 Oct 9, 2024
bf54ad8
Merge branch 'gpu_linear_tree' of https://github.com/dragonbra/LightG…
shiyu1994 Oct 9, 2024
7988c81
add destructor
shiyu1994 Oct 10, 2024
5c0d651
Merge branch 'master' into gpu_linear_tree
shiyu1994 Oct 11, 2024
d07d9cc
add gpu_linear_tree_learner.cpp in make file list
shiyu1994 Oct 11, 2024
27f3598
Merge branch 'gpu_linear_tree' of https://github.com/dragonbra/LightG…
shiyu1994 Oct 11, 2024
8818b7b
use template for linear tree learner
shiyu1994 Oct 18, 2024
df7b55c
Merge branch 'master' into gpu_linear_tree
shiyu1994 Oct 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/io/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -417,9 +417,9 @@ void Config::CheckParamConflict(const std::unordered_map<std::string, std::strin
}
// linear tree learner must be serial type and run on CPU device
if (linear_tree) {
if (device_type != std::string("cpu")) {
if (device_type != std::string("cpu") && device_type != std::string("gpu")) {
device_type = "cpu";
Log::Warning("Linear tree learner only works with CPU.");
Log::Warning("Linear tree learner only works with CPU and GPU. Falling back to CPU now.");
}
if (tree_learner != std::string("serial")) {
tree_learner = "serial";
Expand Down
112 changes: 67 additions & 45 deletions src/treelearner/linear_tree_learner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,22 @@

namespace LightGBM {

void LinearTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian) {
SerialTreeLearner::Init(train_data, is_constant_hessian);
LinearTreeLearner::InitLinear(train_data, config_->num_leaves);
template <typename TREE_LEARNER_TYPE>
void LinearTreeLearner<TREE_LEARNER_TYPE>::Init(const Dataset* train_data, bool is_constant_hessian) {
TREE_LEARNER_TYPE::Init(train_data, is_constant_hessian);
LinearTreeLearner::InitLinear(train_data, this->config_->num_leaves);
}

void LinearTreeLearner::InitLinear(const Dataset* train_data, const int max_leaves) {
template <typename TREE_LEARNER_TYPE>
void LinearTreeLearner<TREE_LEARNER_TYPE>::InitLinear(const Dataset* train_data, const int max_leaves) {
leaf_map_ = std::vector<int>(train_data->num_data(), -1);
contains_nan_ = std::vector<int8_t>(train_data->num_features(), 0);
// identify features containing nans
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (int feat = 0; feat < train_data->num_features(); ++feat) {
auto bin_mapper = train_data_->FeatureBinMapper(feat);
auto bin_mapper = this->train_data_->FeatureBinMapper(feat);
if (bin_mapper->bin_type() == BinType::NumericalBin) {
const float* feat_ptr = train_data_->raw_index(feat);
const float* feat_ptr = this->train_data_->raw_index(feat);
for (int i = 0; i < train_data->num_data(); ++i) {
if (std::isnan(feat_ptr[i])) {
contains_nan_[feat] = 1;
Expand All @@ -40,7 +42,7 @@ void LinearTreeLearner::InitLinear(const Dataset* train_data, const int max_leav
}
}
// preallocate the matrix used to calculate linear model coefficients
int max_num_feat = std::min(max_leaves, train_data_->num_numeric_features());
int max_num_feat = std::min(max_leaves, this->train_data_->num_numeric_features());
XTHX_.clear();
XTg_.clear();
for (int i = 0; i < max_leaves; ++i) {
Expand All @@ -59,51 +61,52 @@ void LinearTreeLearner::InitLinear(const Dataset* train_data, const int max_leav
}
}

Tree* LinearTreeLearner::Train(const score_t* gradients, const score_t *hessians, bool is_first_tree) {
template <typename TREE_LEARNER_TYPE>
Tree* LinearTreeLearner<TREE_LEARNER_TYPE>::Train(const score_t* gradients, const score_t *hessians, bool is_first_tree) {
Common::FunctionTimer fun_timer("SerialTreeLearner::Train", global_timer);
gradients_ = gradients;
hessians_ = hessians;
this->gradients_ = gradients;
this->hessians_ = hessians;
int num_threads = OMP_NUM_THREADS();
if (share_state_->num_threads != num_threads && share_state_->num_threads > 0) {
if (this->share_state_->num_threads != num_threads && this->share_state_->num_threads > 0) {
Log::Warning(
"Detected that num_threads changed during training (from %d to %d), "
"it may cause unexpected errors.",
share_state_->num_threads, num_threads);
this->share_state_->num_threads, num_threads);
}
share_state_->num_threads = num_threads;
this->share_state_->num_threads = num_threads;

// some initial works before training
BeforeTrain();
this->BeforeTrain();

auto tree = std::unique_ptr<Tree>(new Tree(config_->num_leaves, true, true));
auto tree = std::unique_ptr<Tree>(new Tree(this->config_->num_leaves, true, true));
auto tree_ptr = tree.get();
constraints_->ShareTreePointer(tree_ptr);
this->constraints_->ShareTreePointer(tree_ptr);

// root leaf
int left_leaf = 0;
int cur_depth = 1;
// only root leaf can be splitted on first time
int right_leaf = -1;

int init_splits = ForceSplits(tree_ptr, &left_leaf, &right_leaf, &cur_depth);
int init_splits = this->ForceSplits(tree_ptr, &left_leaf, &right_leaf, &cur_depth);

for (int split = init_splits; split < config_->num_leaves - 1; ++split) {
for (int split = init_splits; split < this->config_->num_leaves - 1; ++split) {
// some initial works before finding best split
if (BeforeFindBestSplit(tree_ptr, left_leaf, right_leaf)) {
if (this->BeforeFindBestSplit(tree_ptr, left_leaf, right_leaf)) {
// find best threshold for every feature
FindBestSplits(tree_ptr);
this->FindBestSplits(tree_ptr);
}
// Get a leaf with max split gain
int best_leaf = static_cast<int>(ArrayArgs<SplitInfo>::ArgMax(best_split_per_leaf_));
int best_leaf = static_cast<int>(ArrayArgs<SplitInfo>::ArgMax(this->best_split_per_leaf_));
// Get split information for best leaf
const SplitInfo& best_leaf_SplitInfo = best_split_per_leaf_[best_leaf];
const SplitInfo& best_leaf_SplitInfo = this->best_split_per_leaf_[best_leaf];
// cannot split, quit
if (best_leaf_SplitInfo.gain <= 0.0) {
Log::Warning("No further splits with positive gain, best gain: %f", best_leaf_SplitInfo.gain);
break;
}
// split tree with best leaf
Split(tree_ptr, best_leaf, &left_leaf, &right_leaf);
this->Split(tree_ptr, best_leaf, &left_leaf, &right_leaf);
cur_depth = std::max(cur_depth, tree->leaf_depth(left_leaf));
}

Expand All @@ -120,21 +123,22 @@ Tree* LinearTreeLearner::Train(const score_t* gradients, const score_t *hessians
GetLeafMap(tree_ptr);

if (has_nan) {
CalculateLinear<true>(tree_ptr, false, gradients_, hessians_, is_first_tree);
CalculateLinear<true>(tree_ptr, false, this->gradients_, this->hessians_, is_first_tree);
} else {
CalculateLinear<false>(tree_ptr, false, gradients_, hessians_, is_first_tree);
CalculateLinear<false>(tree_ptr, false, this->gradients_, this->hessians_, is_first_tree);
}

Log::Debug("Trained a tree with leaves = %d and depth = %d", tree->num_leaves(), cur_depth);
return tree.release();
}

Tree* LinearTreeLearner::FitByExistingTree(const Tree* old_tree, const score_t* gradients, const score_t *hessians) const {
auto tree = SerialTreeLearner::FitByExistingTree(old_tree, gradients, hessians);
template <typename TREE_LEARNER_TYPE>
Tree* LinearTreeLearner<TREE_LEARNER_TYPE>::FitByExistingTree(const Tree* old_tree, const score_t* gradients, const score_t *hessians) const {
auto tree = TREE_LEARNER_TYPE::FitByExistingTree(old_tree, gradients, hessians);
bool has_nan = false;
if (any_nan_) {
for (int i = 0; i < tree->num_leaves() - 1 ; ++i) {
if (contains_nan_[train_data_->InnerFeatureIndex(tree->split_feature(i))]) {
if (contains_nan_[this->train_data_->InnerFeatureIndex(tree->split_feature(i))]) {
has_nan = true;
break;
}
Expand All @@ -149,28 +153,31 @@ Tree* LinearTreeLearner::FitByExistingTree(const Tree* old_tree, const score_t*
return tree;
}

Tree* LinearTreeLearner::FitByExistingTree(const Tree* old_tree, const std::vector<int>& leaf_pred,
template <typename TREE_LEARNER_TYPE>
Tree* LinearTreeLearner<TREE_LEARNER_TYPE>::FitByExistingTree(const Tree* old_tree, const std::vector<int>& leaf_pred,
const score_t* gradients, const score_t *hessians) const {
data_partition_->ResetByLeafPred(leaf_pred, old_tree->num_leaves());
this->data_partition_->ResetByLeafPred(leaf_pred, old_tree->num_leaves());
return LinearTreeLearner::FitByExistingTree(old_tree, gradients, hessians);
}

void LinearTreeLearner::GetLeafMap(Tree* tree) const {
template <typename TREE_LEARNER_TYPE>
void LinearTreeLearner<TREE_LEARNER_TYPE>::GetLeafMap(Tree* tree) const {
std::fill(leaf_map_.begin(), leaf_map_.end(), -1);
// map data to leaf number
const data_size_t* ind = data_partition_->indices();
const data_size_t* ind = this->data_partition_->indices();
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(dynamic)
for (int i = 0; i < tree->num_leaves(); ++i) {
data_size_t idx = data_partition_->leaf_begin(i);
for (int j = 0; j < data_partition_->leaf_count(i); ++j) {
data_size_t idx = this->data_partition_->leaf_begin(i);
for (int j = 0; j < this->data_partition_->leaf_count(i); ++j) {
leaf_map_[ind[idx + j]] = i;
}
}
}


template<bool HAS_NAN>
void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t* gradients, const score_t* hessians, bool is_first_tree) const {
template<typename TREE_LEARNER_TYPE>
template <bool HAS_NAN>
void LinearTreeLearner<TREE_LEARNER_TYPE>::CalculateLinear(Tree* tree, bool is_refit, const score_t* gradients, const score_t* hessians, bool is_first_tree) const {
tree->SetIsLinear(true);
int num_leaves = tree->num_leaves();
int num_threads = OMP_NUM_THREADS();
Expand Down Expand Up @@ -209,11 +216,11 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
std::vector<int> numerical_features;
std::vector<const float*> data_ptr;
for (size_t j = 0; j < raw_features.size(); ++j) {
int feat = train_data_->InnerFeatureIndex(raw_features[j]);
auto bin_mapper = train_data_->FeatureBinMapper(feat);
int feat = this->train_data_->InnerFeatureIndex(raw_features[j]);
auto bin_mapper = this->train_data_->FeatureBinMapper(feat);
if (bin_mapper->bin_type() == BinType::NumericalBin) {
numerical_features.push_back(feat);
data_ptr.push_back(train_data_->raw_index(feat));
data_ptr.push_back(this->train_data_->raw_index(feat));
}
}
leaf_features.push_back(numerical_features);
Expand Down Expand Up @@ -245,12 +252,12 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
}
}
OMP_INIT_EX();
#pragma omp parallel num_threads(OMP_NUM_THREADS()) if (num_data_ > 1024)
#pragma omp parallel num_threads(OMP_NUM_THREADS()) if (this->num_data_ > 1024)
{
std::vector<float> curr_row(max_num_features + 1);
int tid = omp_get_thread_num();
#pragma omp for schedule(static)
for (int i = 0; i < num_data_; ++i) {
for (int i = 0; i < this->num_data_; ++i) {
OMP_LOOP_EX_BEGIN();
int leaf_num = leaf_map_[i];
if (leaf_num < 0) {
Expand Down Expand Up @@ -312,11 +319,11 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
}
if (!HAS_NAN) {
for (int leaf_num = 0; leaf_num < num_leaves; ++leaf_num) {
total_nonzero[leaf_num] = data_partition_->leaf_count(leaf_num);
total_nonzero[leaf_num] = this->data_partition_->leaf_count(leaf_num);
}
}
double shrinkage = tree->shrinkage();
double decay_rate = config_->refit_decay_rate;
double decay_rate = this->config_->refit_decay_rate;
// copy into eigen matrices and solve
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
for (int leaf_num = 0; leaf_num < num_leaves; ++leaf_num) {
Expand All @@ -340,7 +347,7 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
XTHX_mat(feat1, feat2) = XTHX_[leaf_num][j];
XTHX_mat(feat2, feat1) = XTHX_mat(feat1, feat2);
if ((feat1 == feat2) && (feat1 < num_feat)) {
XTHX_mat(feat1, feat2) += config_->linear_lambda;
XTHX_mat(feat1, feat2) += this->config_->linear_lambda;
}
++j;
}
Expand All @@ -366,7 +373,7 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
tree->SetLeafFeaturesInner(leaf_num, features_new);
std::vector<int> features_raw(features_new.size());
for (size_t i = 0; i < features_new.size(); ++i) {
features_raw[i] = train_data_->RealFeatureIndex(features_new[i]);
features_raw[i] = this->train_data_->RealFeatureIndex(features_new[i]);
}
tree->SetLeafFeatures(leaf_num, features_raw);
tree->SetLeafCoeffs(leaf_num, coeffs_vec);
Expand All @@ -378,4 +385,19 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
}
}
}

template void LinearTreeLearner<SerialTreeLearner>::Init(const Dataset* train_data, bool is_constant_hessian);
template void LinearTreeLearner<SerialTreeLearner>::InitLinear(const Dataset* train_data, const int max_leaves);
template Tree* LinearTreeLearner<SerialTreeLearner>::Train(const score_t* gradients, const score_t *hessians, bool is_first_tree);
template Tree* LinearTreeLearner<SerialTreeLearner>::FitByExistingTree(const Tree* old_tree, const score_t* gradients, const score_t *hessians) const;
template Tree* LinearTreeLearner<SerialTreeLearner>::FitByExistingTree(const Tree* old_tree, const std::vector<int>& leaf_pred,
const score_t* gradients, const score_t *hessians) const;

template void LinearTreeLearner<GPUTreeLearner>::Init(const Dataset* train_data, bool is_constant_hessian);
template void LinearTreeLearner<GPUTreeLearner>::InitLinear(const Dataset* train_data, const int max_leaves);
template Tree* LinearTreeLearner<GPUTreeLearner>::Train(const score_t* gradients, const score_t *hessians, bool is_first_tree);
template Tree* LinearTreeLearner<GPUTreeLearner>::FitByExistingTree(const Tree* old_tree, const score_t* gradients, const score_t *hessians) const;
template Tree* LinearTreeLearner<GPUTreeLearner>::FitByExistingTree(const Tree* old_tree, const std::vector<int>& leaf_pred,
const score_t* gradients, const score_t *hessians) const;

} // namespace LightGBM
16 changes: 9 additions & 7 deletions src/treelearner/linear_tree_learner.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@
#include <random>
#include <vector>

#include "gpu_tree_learner.h"
#include "serial_tree_learner.h"

namespace LightGBM {

class LinearTreeLearner: public SerialTreeLearner {
template <typename TREE_LEARNER_TYPE>
class LinearTreeLearner: public TREE_LEARNER_TYPE {
public:
explicit LinearTreeLearner(const Config* config) : SerialTreeLearner(config) {}
explicit LinearTreeLearner(const Config* config) : TREE_LEARNER_TYPE(config) {}

void Init(const Dataset* train_data, bool is_constant_hessian) override;

Expand All @@ -38,12 +40,12 @@ class LinearTreeLearner: public SerialTreeLearner {

void AddPredictionToScore(const Tree* tree,
double* out_score) const override {
CHECK_LE(tree->num_leaves(), data_partition_->num_leaves());
CHECK_LE(tree->num_leaves(), this->data_partition_->num_leaves());
bool has_nan = false;
if (any_nan_) {
for (int i = 0; i < tree->num_leaves() - 1 ; ++i) {
// use split_feature because split_feature_inner doesn't work when refitting existing tree
if (contains_nan_[train_data_->InnerFeatureIndex(tree->split_feature(i))]) {
if (contains_nan_[this->train_data_->InnerFeatureIndex(tree->split_feature(i))]) {
has_nan = true;
break;
}
Expand All @@ -69,13 +71,13 @@ class LinearTreeLearner: public SerialTreeLearner {
leaf_coeff[leaf_num] = tree->LeafCoeffs(leaf_num);
leaf_output[leaf_num] = tree->LeafOutput(leaf_num);
for (int feat : tree->LeafFeaturesInner(leaf_num)) {
feat_ptr[leaf_num].push_back(train_data_->raw_index(feat));
feat_ptr[leaf_num].push_back(this->train_data_->raw_index(feat));
}
leaf_num_features[leaf_num] = static_cast<int>(feat_ptr[leaf_num].size());
}
OMP_INIT_EX();
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) if (num_data_ > 1024)
for (int i = 0; i < num_data_; ++i) {
#pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) if (this->num_data_ > 1024)
for (int i = 0; i < this->num_data_; ++i) {
OMP_LOOP_EX_BEGIN();
int leaf_num = leaf_map_[i];
if (leaf_num < 0) {
Expand Down
8 changes: 6 additions & 2 deletions src/treelearner/tree_learner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ TreeLearner* TreeLearner::CreateTreeLearner(const std::string& learner_type, con
if (device_type == std::string("cpu")) {
if (learner_type == std::string("serial")) {
if (config->linear_tree) {
return new LinearTreeLearner(config);
return new LinearTreeLearner<SerialTreeLearner>(config);
} else {
return new SerialTreeLearner(config);
}
Expand All @@ -30,7 +30,11 @@ TreeLearner* TreeLearner::CreateTreeLearner(const std::string& learner_type, con
}
} else if (device_type == std::string("gpu")) {
if (learner_type == std::string("serial")) {
return new GPUTreeLearner(config);
if (config->linear_tree) {
return new LinearTreeLearner<GPUTreeLearner>(config);
} else {
return new GPUTreeLearner(config);
}
} else if (learner_type == std::string("feature")) {
return new FeatureParallelTreeLearner<GPUTreeLearner>(config);
} else if (learner_type == std::string("data")) {
Expand Down
Loading