Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimization of row-wise histogram construction #3522

Merged
merged 61 commits into from
Nov 13, 2020
Merged
Show file tree
Hide file tree
Changes from 44 commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
bfcffab
store without offset in multi_val_dense_bin
shiyu1994 Sep 17, 2020
cffde72
fix offset bug
shiyu1994 Sep 17, 2020
fc6daf2
add comment for offset
shiyu1994 Sep 17, 2020
788101e
add comment for bin type selection
shiyu1994 Sep 17, 2020
ed70174
faster operations for offset
shiyu1994 Sep 18, 2020
fab5cb1
keep most freq bin in histogram for multi val dense
shiyu1994 Sep 18, 2020
8e011e2
use original feature iterators
shiyu1994 Sep 22, 2020
5f40fe5
consider 9 cases (3 x 3) for multi val bin construction
shiyu1994 Sep 23, 2020
a9425a6
merge master
Oct 29, 2020
4232b6c
fix dense bin setting
shiyu1994 Oct 30, 2020
be26f9f
fix bin data in multi val group
shiyu1994 Nov 1, 2020
a36aba6
fix offset of the first feature histogram
shiyu1994 Nov 2, 2020
0a49aa3
Merge branch 'master' of https://github.com/microsoft/LightGBM into m…
shiyu1994 Nov 2, 2020
3effcac
use float hist buf
shiyu1994 Nov 2, 2020
ac501c5
avx in histogram construction
shiyu1994 Nov 2, 2020
2bd0b30
use avx for hist construction without prefetch
shiyu1994 Nov 2, 2020
68042a6
vectorize bin extraction
shiyu1994 Nov 2, 2020
4fab10d
use only 128 vec
shiyu1994 Nov 3, 2020
2c8346b
use avx2
shiyu1994 Nov 3, 2020
6c87fec
use vectorization for sparse row wise
shiyu1994 Nov 3, 2020
8aaf0cc
add bit size for multi val dense bin
shiyu1994 Nov 3, 2020
3238061
float with no vectorization
shiyu1994 Nov 3, 2020
dcde744
change multithreading strategy to dynamic
shiyu1994 Nov 4, 2020
e411060
remove intrinsic header
shiyu1994 Nov 4, 2020
6b007a1
fix dense multi val col copy
shiyu1994 Nov 4, 2020
99dc025
remove bit size
shiyu1994 Nov 4, 2020
23b8e82
use large enough block size when the bin number is large
shiyu1994 Nov 4, 2020
afede0d
calc min block size by sparsity
shiyu1994 Nov 4, 2020
6263f03
rescale gradients
shiyu1994 Nov 4, 2020
f64bb8c
rollback gradients scaling
shiyu1994 Nov 4, 2020
fc385f2
single precision histogram buffer as an option
shiyu1994 Nov 5, 2020
44a6896
add float hist buffer with thread buffer
shiyu1994 Nov 5, 2020
2449fda
fix setting zero in hist data
shiyu1994 Nov 6, 2020
0ce28ed
fix hist begin pointer in tree learners
shiyu1994 Nov 6, 2020
d525ca0
remove debug logs
shiyu1994 Nov 6, 2020
bb17cfd
remove omp simd
shiyu1994 Nov 6, 2020
994e04d
update Makevars of R-package
shiyu1994 Nov 6, 2020
347e608
fix feature group binary storing
shiyu1994 Nov 6, 2020
c19d52a
two row wise for double hist buffer
shiyu1994 Nov 10, 2020
cef713f
add subfeature for two row wise
shiyu1994 Nov 10, 2020
9529d02
remove useless code and fix two row wise
shiyu1994 Nov 11, 2020
4d5fa18
refactor code
shiyu1994 Nov 11, 2020
72c82d4
grouping the dense feature groups can get sparse multi val bin
shiyu1994 Nov 11, 2020
1ede693
clean format problems
shiyu1994 Nov 11, 2020
02994e4
one thread for two blocks in sep row wise
shiyu1994 Nov 12, 2020
840c3a4
use ordered gradients for sep row wise
shiyu1994 Nov 12, 2020
de20c25
fix grad ptr
shiyu1994 Nov 12, 2020
286c593
ordered grad with combined block for sep row wise
shiyu1994 Nov 12, 2020
0ebfa47
fix block threading
shiyu1994 Nov 12, 2020
17c7ae6
use the same min block size
shiyu1994 Nov 12, 2020
6ea41da
rollback share min block size
shiyu1994 Nov 12, 2020
7315d21
remove logs
shiyu1994 Nov 12, 2020
b7b03b5
Update src/io/dataset.cpp
shiyu1994 Nov 12, 2020
e7e45ab
fix parameter description
shiyu1994 Nov 12, 2020
605657c
Merge branch 'multi_val_opt' of https://github.com/shiyu1994/LightGBM…
shiyu1994 Nov 12, 2020
d420208
remove sep_row_wise
shiyu1994 Nov 12, 2020
d198bdb
remove check codes
shiyu1994 Nov 12, 2020
54716e4
add check for empty multi val bin
shiyu1994 Nov 12, 2020
8117a1f
fix lint error
shiyu1994 Nov 12, 2020
56b5f93
rollback changes in config.h
shiyu1994 Nov 12, 2020
1e10eb1
Apply suggestions from code review
guolinke Nov 13, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions R-package/src/Makevars.in
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ OBJECTS = \
io/json11.o \
io/metadata.o \
io/parser.o \
io/train_share_states.o \
io/tree.o \
metric/dcg_calculator.o \
metric/metric.o \
Expand Down
1 change: 1 addition & 0 deletions R-package/src/Makevars.win.in
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ OBJECTS = \
io/json11.o \
io/metadata.o \
io/parser.o \
io/train_share_states.o \
io/tree.o \
metric/dcg_calculator.o \
metric/metric.o \
Expand Down
12 changes: 8 additions & 4 deletions include/LightGBM/bin.h
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ class MultiValBin {

virtual double num_element_per_row() const = 0;

virtual const std::vector<uint32_t>& offsets() const = 0;

virtual void PushOneRow(int tid, data_size_t idx, const std::vector<uint32_t>& values) = 0;

Expand All @@ -408,7 +409,8 @@ class MultiValBin {

virtual MultiValBin* CreateLike(data_size_t num_data, int num_bin,
int num_feature,
double estimate_element_per_row) const = 0;
double estimate_element_per_row,
const std::vector<uint32_t>& offsets) const = 0;

virtual void CopySubcol(const MultiValBin* full_bin,
const std::vector<int>& used_feature_index,
Expand All @@ -417,7 +419,7 @@ class MultiValBin {
const std::vector<uint32_t>& delta) = 0;

virtual void ReSize(data_size_t num_data, int num_bin, int num_feature,
double estimate_element_per_row) = 0;
double estimate_element_per_row, const std::vector<uint32_t>& offsets) = 0;

virtual void CopySubrowAndSubcol(
const MultiValBin* full_bin, const data_size_t* used_indices,
Expand Down Expand Up @@ -447,13 +449,15 @@ class MultiValBin {
virtual bool IsSparse() = 0;

static MultiValBin* CreateMultiValBin(data_size_t num_data, int num_bin,
int num_feature, double sparse_rate);
int num_feature, double sparse_rate, const std::vector<uint32_t>& offsets);

static MultiValBin* CreateMultiValDenseBin(data_size_t num_data, int num_bin,
int num_feature);
int num_feature, const std::vector<uint32_t>& offsets);

static MultiValBin* CreateMultiValSparseBin(data_size_t num_data, int num_bin, double estimate_element_per_row);

static constexpr double multi_val_bin_sparse_threshold = 0.25f;

virtual MultiValBin* Clone() = 0;
};

Expand Down
20 changes: 16 additions & 4 deletions include/LightGBM/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,8 @@ struct Config {
// descl2 = the number of columns is large, or the total number of bins is large
// descl2 = ``num_threads`` is large, e.g. ``> 20``
// descl2 = you want to reduce memory cost
// desc = **Note**: when both ``force_col_wise`` and ``force_row_wise`` are ``false``, LightGBM will firstly try them both, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
// desc = **Note**: this parameter cannot be used at the same time with ``force_row_wise``, choose only one of them
// desc = **Note**: when both ``force_col_wise``, ``force_row_wise`` and ``force_two_row_wise`` are ``false``, LightGBM will firstly try them all, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
// desc = **Note**: this parameter cannot be used at the same time with ``force_row_wise`` and ``force_two_row_wise``, choose only one of them
bool force_col_wise = false;

// desc = used only with ``cpu`` device type
Expand All @@ -246,10 +246,22 @@ struct Config {
// descl2 = ``num_threads`` is relatively small, e.g. ``<= 16``
// descl2 = you want to use small ``bagging_fraction`` or ``goss`` boosting to speed up
// desc = **Note**: setting this to ``true`` will double the memory cost for Dataset object. If you have not enough memory, you can try setting ``force_col_wise=true``
// desc = **Note**: when both ``force_col_wise`` and ``force_row_wise`` are ``false``, LightGBM will firstly try them both, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
// desc = **Note**: this parameter cannot be used at the same time with ``force_col_wise``, choose only one of them
// desc = **Note**: when both ``force_col_wise``, ``force_row_wise`` and ``force_two_row_wise`` are ``false``, LightGBM will firstly try them all, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
// desc = **Note**: this parameter cannot be used at the same time with ``force_col_wise`` and ``force_two_row_wise``, choose only one of them
bool force_row_wise = false;

// desc = used only with ``cpu`` device type
// desc = set this to ``true`` to force separate row-wise histogram building for sparse and dense features
// desc = enabling this is recommended when:
// descl2 = there are both sparse and dense features in the dataset
// descl2 = the number of data points is large, and the total number of bins is relatively small
// descl2 = ``num_threads`` is relatively small, e.g. ``<= 16``
// descl2 = you want to use small ``bagging_fraction`` or ``goss`` boosting to speed up
// desc = **Note**: setting this to ``true`` will double the memory cost for Dataset object. If you have not enough memory, you can try setting ``force_col_wise=true``
// desc = **Note**: when both ``force_col_wise``, ``force_row_wise`` and ``force_two_row_wise`` are ``false``, LightGBM will firstly try them all, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
// desc = **Note**: this parameter cannot be used at the same time with ``force_col_wise`` and ``force_row_wise``, choose only one of them
bool force_two_row_wise = false;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe force_sep_row_wise?


// alias = hist_pool_size
// desc = max cache size in MB for historical histogram
// desc = ``< 0`` means no limit
Expand Down
60 changes: 6 additions & 54 deletions include/LightGBM/dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <LightGBM/config.h>
#include <LightGBM/feature_group.h>
#include <LightGBM/meta.h>
#include <LightGBM/train_share_states.h>
#include <LightGBM/utils/openmp_wrapper.h>
#include <LightGBM/utils/random.h>
#include <LightGBM/utils/text_reader.h>
Expand Down Expand Up @@ -275,57 +276,6 @@ class Parser {
static Parser* CreateParser(const char* filename, bool header, int num_features, int label_idx);
};

struct TrainingShareStates {
int num_threads = 0;
bool is_colwise = true;
bool is_use_subcol = false;
bool is_use_subrow = false;
bool is_subrow_copied = false;
bool is_constant_hessian = true;
const data_size_t* bagging_use_indices;
data_size_t bagging_indices_cnt;
int num_bin_aligned;
std::unique_ptr<MultiValBin> multi_val_bin;
std::unique_ptr<MultiValBin> multi_val_bin_subset;
std::vector<uint32_t> hist_move_src;
std::vector<uint32_t> hist_move_dest;
std::vector<uint32_t> hist_move_size;
std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>>
hist_buf;

void SetMultiValBin(MultiValBin* bin) {
num_threads = OMP_NUM_THREADS();
if (bin == nullptr) {
return;
}
multi_val_bin.reset(bin);
num_bin_aligned =
(bin->num_bin() + kAlignedSize - 1) / kAlignedSize * kAlignedSize;
size_t new_size = static_cast<size_t>(num_bin_aligned) * 2 * num_threads;
if (new_size > hist_buf.size()) {
hist_buf.resize(static_cast<size_t>(num_bin_aligned) * 2 * num_threads);
}
}

hist_t* TempBuf() {
if (!is_use_subcol) {
return nullptr;
}
return hist_buf.data() + hist_buf.size() - num_bin_aligned * 2;
}

void HistMove(const hist_t* src, hist_t* dest) {
if (!is_use_subcol) {
return;
}
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(hist_move_src.size()); ++i) {
std::copy_n(src + hist_move_src[i], hist_move_size[i],
dest + hist_move_dest[i]);
}
}
};

/*! \brief The main class of data set,
* which are used to training or validation
*/
Expand Down Expand Up @@ -444,14 +394,16 @@ class Dataset {

void CopySubrow(const Dataset* fullset, const data_size_t* used_indices, data_size_t num_used_indices, bool need_meta_data);

MultiValBin* GetMultiBinFromSparseFeatures() const;
MultiValBin* GetMultiBinFromSparseFeatures(const std::vector<uint32_t>& offsets) const;

MultiValBin* GetMultiBinFromDenseFeatures(const std::vector<uint32_t>& offsets) const;

MultiValBin* GetMultiBinFromAllFeatures() const;
MultiValBin* GetMultiBinFromAllFeatures(const std::vector<uint32_t>& offsets) const;

TrainingShareStates* GetShareStates(
score_t* gradients, score_t* hessians,
const std::vector<int8_t>& is_feature_used, bool is_constant_hessian,
bool force_colwise, bool force_rowwise) const;
bool force_colwise, bool force_rowwise, bool force_two_rowwise) const;

LIGHTGBM_EXPORT void FinishLoad();

Expand Down
62 changes: 56 additions & 6 deletions include/LightGBM/feature_group.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,16 @@ namespace LightGBM {

class Dataset;
class DatasetLoader;
class TrainingShareStates;
class MultiValBinWrapper;
/*! \brief Using to store data and providing some operations on one feature
* group*/
class FeatureGroup {
public:
friend Dataset;
friend DatasetLoader;
friend TrainingShareStates;
friend MultiValBinWrapper;
/*!
* \brief Constructor
* \param num_feature number of features of this group
Expand All @@ -35,15 +39,27 @@ class FeatureGroup {
std::vector<std::unique_ptr<BinMapper>>* bin_mappers,
data_size_t num_data) : num_feature_(num_feature), is_multi_val_(is_multi_val > 0), is_sparse_(false) {
CHECK_EQ(static_cast<int>(bin_mappers->size()), num_feature);
// use bin at zero to store most_freq_bin
num_total_bin_ = 1;
bin_offsets_.emplace_back(num_total_bin_);
auto& ref_bin_mappers = *bin_mappers;
double sum_sparse_rate = 0.0f;
for (int i = 0; i < num_feature_; ++i) {
bin_mappers_.emplace_back(ref_bin_mappers[i].release());
sum_sparse_rate += bin_mappers_.back()->sparse_rate();
}
sum_sparse_rate /= num_feature_;
int offset = 1;
is_dense_multi_val_ = false;
if (sum_sparse_rate < MultiValBin::multi_val_bin_sparse_threshold && is_multi_val_) {
// use dense multi val bin
offset = 0;
is_dense_multi_val_ = true;
}
// use bin at zero to store most_freq_bin only when not using dense multi val bin
num_total_bin_ = offset;
bin_offsets_.emplace_back(num_total_bin_);
for (int i = 0; i < num_feature_; ++i) {
auto num_bin = bin_mappers_[i]->num_bin();
if (bin_mappers_[i]->GetMostFreqBin() == 0) {
num_bin -= 1;
num_bin -= offset;
}
num_total_bin_ += num_bin;
bin_offsets_.emplace_back(num_total_bin_);
Expand All @@ -54,6 +70,7 @@ class FeatureGroup {
FeatureGroup(const FeatureGroup& other, int num_data) {
num_feature_ = other.num_feature_;
is_multi_val_ = other.is_multi_val_;
is_dense_multi_val_ = other.is_dense_multi_val_;
is_sparse_ = other.is_sparse_;
num_total_bin_ = other.num_total_bin_;
bin_offsets_ = other.bin_offsets_;
Expand All @@ -70,6 +87,7 @@ class FeatureGroup {
CHECK_EQ(static_cast<int>(bin_mappers->size()), 1);
// use bin at zero to store default_bin
num_total_bin_ = 1;
is_dense_multi_val_ = false;
bin_offsets_.emplace_back(num_total_bin_);
auto& ref_bin_mappers = *bin_mappers;
for (int i = 0; i < num_feature_; ++i) {
Expand All @@ -96,6 +114,8 @@ class FeatureGroup {
// get is_sparse
is_multi_val_ = *(reinterpret_cast<const bool*>(memory_ptr));
memory_ptr += VirtualFileWriter::AlignedSize(sizeof(is_multi_val_));
is_dense_multi_val_ = *(reinterpret_cast<const bool*>(memory_ptr));
memory_ptr += VirtualFileWriter::AlignedSize(sizeof(is_dense_multi_val_));
is_sparse_ = *(reinterpret_cast<const bool*>(memory_ptr));
memory_ptr += VirtualFileWriter::AlignedSize(sizeof(is_sparse_));
num_feature_ = *(reinterpret_cast<const int*>(memory_ptr));
Expand Down Expand Up @@ -193,15 +213,41 @@ class FeatureGroup {
void AddFeaturesFrom(const FeatureGroup* other) {
CHECK(is_multi_val_);
CHECK(other->is_multi_val_);
// every time when new features are added, we need to reconsider sparse or dense
double sum_sparse_rate = 0.0f;
for (int i = 0; i < num_feature_; ++i) {
sum_sparse_rate += bin_mappers_[i]->sparse_rate();
}
for (int i = 0; i < other->num_feature_; ++i) {
sum_sparse_rate += other->bin_mappers_[i]->sparse_rate();
}
sum_sparse_rate /= (num_feature_ + other->num_feature_);
int offset = 1;
is_dense_multi_val_ = false;
if (sum_sparse_rate < MultiValBin::multi_val_bin_sparse_threshold && is_multi_val_) {
// use dense multi val bin
offset = 0;
is_dense_multi_val_ = true;
}
bin_offsets_.clear();
num_total_bin_ = offset;
bin_offsets_.emplace_back(num_total_bin_);
for (int i = 0; i < num_feature_; ++i) {
auto num_bin = bin_mappers_[i]->num_bin();
if (bin_mappers_[i]->GetMostFreqBin() == 0) {
num_bin -= offset;
}
num_total_bin_ += num_bin;
bin_offsets_.emplace_back(num_total_bin_);
}
for (int i = 0; i < other->num_feature_; ++i) {
const auto& other_bin_mapper = other->bin_mappers_[i];
bin_mappers_.emplace_back(new BinMapper(*other_bin_mapper));
auto num_bin = other_bin_mapper->num_bin();
if (other_bin_mapper->GetMostFreqBin() == 0) {
num_bin -= 1;
num_bin -= offset;
}
num_total_bin_ += num_bin;
bin_offsets_.emplace_back(num_total_bin_);
guolinke marked this conversation as resolved.
Show resolved Hide resolved
multi_bin_data_.emplace_back(other->multi_bin_data_[i]->Clone());
}
num_feature_ += other->num_feature_;
Expand Down Expand Up @@ -321,6 +367,7 @@ class FeatureGroup {
*/
void SaveBinaryToFile(const VirtualFileWriter* writer) const {
writer->AlignedWrite(&is_multi_val_, sizeof(is_multi_val_));
writer->AlignedWrite(&is_dense_multi_val_, sizeof(is_dense_multi_val_));
writer->AlignedWrite(&is_sparse_, sizeof(is_sparse_));
writer->AlignedWrite(&num_feature_, sizeof(num_feature_));
for (int i = 0; i < num_feature_; ++i) {
Expand All @@ -340,6 +387,7 @@ class FeatureGroup {
*/
size_t SizesInByte() const {
size_t ret = VirtualFileWriter::AlignedSize(sizeof(is_multi_val_)) +
VirtualFileWriter::AlignedSize(sizeof(is_dense_multi_val_)) +
VirtualFileWriter::AlignedSize(sizeof(is_sparse_)) +
VirtualFileWriter::AlignedSize(sizeof(num_feature_));
for (int i = 0; i < num_feature_; ++i) {
Expand All @@ -362,6 +410,7 @@ class FeatureGroup {
FeatureGroup(const FeatureGroup& other) {
num_feature_ = other.num_feature_;
is_multi_val_ = other.is_multi_val_;
is_dense_multi_val_ = other.is_dense_multi_val_;
is_sparse_ = other.is_sparse_;
num_total_bin_ = other.num_total_bin_;
bin_offsets_ = other.bin_offsets_;
Expand Down Expand Up @@ -420,6 +469,7 @@ class FeatureGroup {
std::vector<std::unique_ptr<Bin>> multi_bin_data_;
/*! \brief True if this feature is sparse */
bool is_multi_val_;
bool is_dense_multi_val_;
bool is_sparse_;
int num_total_bin_;
};
Expand Down
Loading