Skip to content

Commit

Permalink
Optimization of row-wise histogram construction (#3522)
Browse files Browse the repository at this point in the history
* store without offset in multi_val_dense_bin

* fix offset bug

* add comment for offset

* add comment for bin type selection

* faster operations for offset

* keep most freq bin in histogram for multi val dense

* use original feature iterators

* consider 9 cases (3 x 3) for multi val bin construction

* fix dense bin setting

* fix bin data in multi val group

* fix offset of the first feature histogram

* use float hist buf

* avx in histogram construction

* use avx for hist construction without prefetch

* vectorize bin extraction

* use only 128 vec

* use avx2

* use vectorization for sparse row wise

* add bit size for multi val dense bin

* float with no vectorization

* change multithreading strategy to dynamic

* remove intrinsic header

* fix dense multi val col copy

* remove bit size

* use large enough block size when the bin number is large

* calc min block size by sparsity

* rescale gradients

* rollback gradients scaling

* single precision histogram buffer as an option

* add float hist buffer with thread buffer

* fix setting zero in hist data

* fix hist begin pointer in tree learners

* remove debug logs

* remove omp simd

* update Makevars of R-package

* fix feature group binary storing

* two row wise for double hist buffer

* add subfeature for two row wise

* remove useless code and fix two row wise

* refactor code

* grouping the dense feature groups can get sparse multi val bin

* clean format problems

* one thread for two blocks in sep row wise

* use ordered gradients for sep row wise

* fix grad ptr

* ordered grad with combined block for sep row wise

* fix block threading

* use the same min block size

* rollback share min block size

* remove logs

* Update src/io/dataset.cpp

Co-authored-by: Guolin Ke <guolin.ke@outlook.com>

* fix parameter description

* remove sep_row_wise

* remove check codes

* add check for empty multi val bin

* fix lint error

* rollback changes in config.h

* Apply suggestions from code review

Co-authored-by: Ubuntu <shiyu@gbdt-04.ren3kv4wanvufliwrpy4k03lsf.xx.internal.cloudapp.net>
Co-authored-by: Guolin Ke <guolin.ke@outlook.com>
  • Loading branch information
3 people authored Nov 13, 2020
1 parent 1bc2793 commit 0655d67
Show file tree
Hide file tree
Showing 19 changed files with 901 additions and 454 deletions.
1 change: 1 addition & 0 deletions R-package/src/Makevars.in
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ OBJECTS = \
io/json11.o \
io/metadata.o \
io/parser.o \
io/train_share_states.o \
io/tree.o \
metric/dcg_calculator.o \
metric/metric.o \
Expand Down
1 change: 1 addition & 0 deletions R-package/src/Makevars.win.in
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ OBJECTS = \
io/json11.o \
io/metadata.o \
io/parser.o \
io/train_share_states.o \
io/tree.o \
metric/dcg_calculator.o \
metric/metric.o \
Expand Down
12 changes: 8 additions & 4 deletions include/LightGBM/bin.h
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ class MultiValBin {

virtual double num_element_per_row() const = 0;

virtual const std::vector<uint32_t>& offsets() const = 0;

virtual void PushOneRow(int tid, data_size_t idx, const std::vector<uint32_t>& values) = 0;

Expand All @@ -408,7 +409,8 @@ class MultiValBin {

virtual MultiValBin* CreateLike(data_size_t num_data, int num_bin,
int num_feature,
double estimate_element_per_row) const = 0;
double estimate_element_per_row,
const std::vector<uint32_t>& offsets) const = 0;

virtual void CopySubcol(const MultiValBin* full_bin,
const std::vector<int>& used_feature_index,
Expand All @@ -417,7 +419,7 @@ class MultiValBin {
const std::vector<uint32_t>& delta) = 0;

virtual void ReSize(data_size_t num_data, int num_bin, int num_feature,
double estimate_element_per_row) = 0;
double estimate_element_per_row, const std::vector<uint32_t>& offsets) = 0;

virtual void CopySubrowAndSubcol(
const MultiValBin* full_bin, const data_size_t* used_indices,
Expand Down Expand Up @@ -447,13 +449,15 @@ class MultiValBin {
virtual bool IsSparse() = 0;

static MultiValBin* CreateMultiValBin(data_size_t num_data, int num_bin,
int num_feature, double sparse_rate);
int num_feature, double sparse_rate, const std::vector<uint32_t>& offsets);

static MultiValBin* CreateMultiValDenseBin(data_size_t num_data, int num_bin,
int num_feature);
int num_feature, const std::vector<uint32_t>& offsets);

static MultiValBin* CreateMultiValSparseBin(data_size_t num_data, int num_bin, double estimate_element_per_row);

static constexpr double multi_val_bin_sparse_threshold = 0.25f;

virtual MultiValBin* Clone() = 0;
};

Expand Down
58 changes: 4 additions & 54 deletions include/LightGBM/dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <LightGBM/config.h>
#include <LightGBM/feature_group.h>
#include <LightGBM/meta.h>
#include <LightGBM/train_share_states.h>
#include <LightGBM/utils/openmp_wrapper.h>
#include <LightGBM/utils/random.h>
#include <LightGBM/utils/text_reader.h>
Expand Down Expand Up @@ -275,57 +276,6 @@ class Parser {
static Parser* CreateParser(const char* filename, bool header, int num_features, int label_idx);
};

struct TrainingShareStates {
int num_threads = 0;
bool is_colwise = true;
bool is_use_subcol = false;
bool is_use_subrow = false;
bool is_subrow_copied = false;
bool is_constant_hessian = true;
const data_size_t* bagging_use_indices;
data_size_t bagging_indices_cnt;
int num_bin_aligned;
std::unique_ptr<MultiValBin> multi_val_bin;
std::unique_ptr<MultiValBin> multi_val_bin_subset;
std::vector<uint32_t> hist_move_src;
std::vector<uint32_t> hist_move_dest;
std::vector<uint32_t> hist_move_size;
std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>>
hist_buf;

void SetMultiValBin(MultiValBin* bin) {
num_threads = OMP_NUM_THREADS();
if (bin == nullptr) {
return;
}
multi_val_bin.reset(bin);
num_bin_aligned =
(bin->num_bin() + kAlignedSize - 1) / kAlignedSize * kAlignedSize;
size_t new_size = static_cast<size_t>(num_bin_aligned) * 2 * num_threads;
if (new_size > hist_buf.size()) {
hist_buf.resize(static_cast<size_t>(num_bin_aligned) * 2 * num_threads);
}
}

hist_t* TempBuf() {
if (!is_use_subcol) {
return nullptr;
}
return hist_buf.data() + hist_buf.size() - num_bin_aligned * 2;
}

void HistMove(const hist_t* src, hist_t* dest) {
if (!is_use_subcol) {
return;
}
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(hist_move_src.size()); ++i) {
std::copy_n(src + hist_move_src[i], hist_move_size[i],
dest + hist_move_dest[i]);
}
}
};

/*! \brief The main class of data set,
* which are used to training or validation
*/
Expand Down Expand Up @@ -444,14 +394,14 @@ class Dataset {

void CopySubrow(const Dataset* fullset, const data_size_t* used_indices, data_size_t num_used_indices, bool need_meta_data);

MultiValBin* GetMultiBinFromSparseFeatures() const;
MultiValBin* GetMultiBinFromSparseFeatures(const std::vector<uint32_t>& offsets) const;

MultiValBin* GetMultiBinFromAllFeatures() const;
MultiValBin* GetMultiBinFromAllFeatures(const std::vector<uint32_t>& offsets) const;

TrainingShareStates* GetShareStates(
score_t* gradients, score_t* hessians,
const std::vector<int8_t>& is_feature_used, bool is_constant_hessian,
bool force_colwise, bool force_rowwise) const;
bool force_col_wise, bool force_row_wise) const;

LIGHTGBM_EXPORT void FinishLoad();

Expand Down
62 changes: 56 additions & 6 deletions include/LightGBM/feature_group.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,16 @@ namespace LightGBM {

class Dataset;
class DatasetLoader;
class TrainingShareStates;
class MultiValBinWrapper;
/*! \brief Using to store data and providing some operations on one feature
* group*/
class FeatureGroup {
public:
friend Dataset;
friend DatasetLoader;
friend TrainingShareStates;
friend MultiValBinWrapper;
/*!
* \brief Constructor
* \param num_feature number of features of this group
Expand All @@ -35,15 +39,27 @@ class FeatureGroup {
std::vector<std::unique_ptr<BinMapper>>* bin_mappers,
data_size_t num_data) : num_feature_(num_feature), is_multi_val_(is_multi_val > 0), is_sparse_(false) {
CHECK_EQ(static_cast<int>(bin_mappers->size()), num_feature);
// use bin at zero to store most_freq_bin
num_total_bin_ = 1;
bin_offsets_.emplace_back(num_total_bin_);
auto& ref_bin_mappers = *bin_mappers;
double sum_sparse_rate = 0.0f;
for (int i = 0; i < num_feature_; ++i) {
bin_mappers_.emplace_back(ref_bin_mappers[i].release());
sum_sparse_rate += bin_mappers_.back()->sparse_rate();
}
sum_sparse_rate /= num_feature_;
int offset = 1;
is_dense_multi_val_ = false;
if (sum_sparse_rate < MultiValBin::multi_val_bin_sparse_threshold && is_multi_val_) {
// use dense multi val bin
offset = 0;
is_dense_multi_val_ = true;
}
// use bin at zero to store most_freq_bin only when not using dense multi val bin
num_total_bin_ = offset;
bin_offsets_.emplace_back(num_total_bin_);
for (int i = 0; i < num_feature_; ++i) {
auto num_bin = bin_mappers_[i]->num_bin();
if (bin_mappers_[i]->GetMostFreqBin() == 0) {
num_bin -= 1;
num_bin -= offset;
}
num_total_bin_ += num_bin;
bin_offsets_.emplace_back(num_total_bin_);
Expand All @@ -54,6 +70,7 @@ class FeatureGroup {
FeatureGroup(const FeatureGroup& other, int num_data) {
num_feature_ = other.num_feature_;
is_multi_val_ = other.is_multi_val_;
is_dense_multi_val_ = other.is_dense_multi_val_;
is_sparse_ = other.is_sparse_;
num_total_bin_ = other.num_total_bin_;
bin_offsets_ = other.bin_offsets_;
Expand All @@ -70,6 +87,7 @@ class FeatureGroup {
CHECK_EQ(static_cast<int>(bin_mappers->size()), 1);
// use bin at zero to store default_bin
num_total_bin_ = 1;
is_dense_multi_val_ = false;
bin_offsets_.emplace_back(num_total_bin_);
auto& ref_bin_mappers = *bin_mappers;
for (int i = 0; i < num_feature_; ++i) {
Expand All @@ -96,6 +114,8 @@ class FeatureGroup {
// get is_sparse
is_multi_val_ = *(reinterpret_cast<const bool*>(memory_ptr));
memory_ptr += VirtualFileWriter::AlignedSize(sizeof(is_multi_val_));
is_dense_multi_val_ = *(reinterpret_cast<const bool*>(memory_ptr));
memory_ptr += VirtualFileWriter::AlignedSize(sizeof(is_dense_multi_val_));
is_sparse_ = *(reinterpret_cast<const bool*>(memory_ptr));
memory_ptr += VirtualFileWriter::AlignedSize(sizeof(is_sparse_));
num_feature_ = *(reinterpret_cast<const int*>(memory_ptr));
Expand Down Expand Up @@ -193,15 +213,41 @@ class FeatureGroup {
void AddFeaturesFrom(const FeatureGroup* other) {
CHECK(is_multi_val_);
CHECK(other->is_multi_val_);
// every time when new features are added, we need to reconsider sparse or dense
double sum_sparse_rate = 0.0f;
for (int i = 0; i < num_feature_; ++i) {
sum_sparse_rate += bin_mappers_[i]->sparse_rate();
}
for (int i = 0; i < other->num_feature_; ++i) {
sum_sparse_rate += other->bin_mappers_[i]->sparse_rate();
}
sum_sparse_rate /= (num_feature_ + other->num_feature_);
int offset = 1;
is_dense_multi_val_ = false;
if (sum_sparse_rate < MultiValBin::multi_val_bin_sparse_threshold && is_multi_val_) {
// use dense multi val bin
offset = 0;
is_dense_multi_val_ = true;
}
bin_offsets_.clear();
num_total_bin_ = offset;
bin_offsets_.emplace_back(num_total_bin_);
for (int i = 0; i < num_feature_; ++i) {
auto num_bin = bin_mappers_[i]->num_bin();
if (bin_mappers_[i]->GetMostFreqBin() == 0) {
num_bin -= offset;
}
num_total_bin_ += num_bin;
bin_offsets_.emplace_back(num_total_bin_);
}
for (int i = 0; i < other->num_feature_; ++i) {
const auto& other_bin_mapper = other->bin_mappers_[i];
bin_mappers_.emplace_back(new BinMapper(*other_bin_mapper));
auto num_bin = other_bin_mapper->num_bin();
if (other_bin_mapper->GetMostFreqBin() == 0) {
num_bin -= 1;
num_bin -= offset;
}
num_total_bin_ += num_bin;
bin_offsets_.emplace_back(num_total_bin_);
multi_bin_data_.emplace_back(other->multi_bin_data_[i]->Clone());
}
num_feature_ += other->num_feature_;
Expand Down Expand Up @@ -321,6 +367,7 @@ class FeatureGroup {
*/
void SaveBinaryToFile(const VirtualFileWriter* writer) const {
writer->AlignedWrite(&is_multi_val_, sizeof(is_multi_val_));
writer->AlignedWrite(&is_dense_multi_val_, sizeof(is_dense_multi_val_));
writer->AlignedWrite(&is_sparse_, sizeof(is_sparse_));
writer->AlignedWrite(&num_feature_, sizeof(num_feature_));
for (int i = 0; i < num_feature_; ++i) {
Expand All @@ -340,6 +387,7 @@ class FeatureGroup {
*/
size_t SizesInByte() const {
size_t ret = VirtualFileWriter::AlignedSize(sizeof(is_multi_val_)) +
VirtualFileWriter::AlignedSize(sizeof(is_dense_multi_val_)) +
VirtualFileWriter::AlignedSize(sizeof(is_sparse_)) +
VirtualFileWriter::AlignedSize(sizeof(num_feature_));
for (int i = 0; i < num_feature_; ++i) {
Expand All @@ -362,6 +410,7 @@ class FeatureGroup {
FeatureGroup(const FeatureGroup& other) {
num_feature_ = other.num_feature_;
is_multi_val_ = other.is_multi_val_;
is_dense_multi_val_ = other.is_dense_multi_val_;
is_sparse_ = other.is_sparse_;
num_total_bin_ = other.num_total_bin_;
bin_offsets_ = other.bin_offsets_;
Expand Down Expand Up @@ -420,6 +469,7 @@ class FeatureGroup {
std::vector<std::unique_ptr<Bin>> multi_bin_data_;
/*! \brief True if this feature is sparse */
bool is_multi_val_;
bool is_dense_multi_val_;
bool is_sparse_;
int num_total_bin_;
};
Expand Down
Loading

0 comments on commit 0655d67

Please sign in to comment.