From 0655d67cc1ee28d536a773a6c5d978d38a0e56c1 Mon Sep 17 00:00:00 2001 From: shiyu1994 Date: Fri, 13 Nov 2020 23:26:38 +0800 Subject: [PATCH] Optimization of row-wise histogram construction (#3522) * store without offset in multi_val_dense_bin * fix offset bug * add comment for offset * add comment for bin type selection * faster operations for offset * keep most freq bin in histogram for multi val dense * use original feature iterators * consider 9 cases (3 x 3) for multi val bin construction * fix dense bin setting * fix bin data in multi val group * fix offset of the first feature histogram * use float hist buf * avx in histogram construction * use avx for hist construction without prefetch * vectorize bin extraction * use only 128 vec * use avx2 * use vectorization for sparse row wise * add bit size for multi val dense bin * float with no vectorization * change multithreading strategy to dynamic * remove intrinsic header * fix dense multi val col copy * remove bit size * use large enough block size when the bin number is large * calc min block size by sparsity * rescale gradients * rollback gradients scaling * single precision histogram buffer as an option * add float hist buffer with thread buffer * fix setting zero in hist data * fix hist begin pointer in tree learners * remove debug logs * remove omp simd * update Makevars of R-package * fix feature group binary storing * two row wise for double hist buffer * add subfeature for two row wise * remove useless code and fix two row wise * refactor code * grouping the dense feature groups can get sparse multi val bin * clean format problems * one thread for two blocks in sep row wise * use ordered gradients for sep row wise * fix grad ptr * ordered grad with combined block for sep row wise * fix block threading * use the same min block size * rollback share min block size * remove logs * Update src/io/dataset.cpp Co-authored-by: Guolin Ke * fix parameter description * remove sep_row_wise * remove check codes * add check for empty multi val bin * fix lint error * rollback changes in config.h * Apply suggestions from code review Co-authored-by: Ubuntu Co-authored-by: Guolin Ke --- R-package/src/Makevars.in | 1 + R-package/src/Makevars.win.in | 1 + include/LightGBM/bin.h | 12 +- include/LightGBM/dataset.h | 58 +-- include/LightGBM/feature_group.h | 62 ++- include/LightGBM/train_share_states.h | 227 ++++++++++ include/LightGBM/utils/threading.h | 18 + src/io/bin.cpp | 27 +- src/io/dataset.cpp | 364 ++++------------ src/io/multi_val_dense_bin.hpp | 73 ++-- src/io/multi_val_sparse_bin.hpp | 37 +- src/io/train_share_states.cpp | 407 ++++++++++++++++++ .../data_parallel_tree_learner.cpp | 2 +- src/treelearner/feature_histogram.hpp | 36 +- src/treelearner/serial_tree_learner.cpp | 15 +- src/treelearner/serial_tree_learner.h | 6 +- .../voting_parallel_tree_learner.cpp | 5 +- windows/LightGBM.vcxproj | 1 + windows/LightGBM.vcxproj.filters | 3 + 19 files changed, 901 insertions(+), 454 deletions(-) create mode 100644 include/LightGBM/train_share_states.h create mode 100644 src/io/train_share_states.cpp diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in index 934a77337324..3ff6cbda46c8 100644 --- a/R-package/src/Makevars.in +++ b/R-package/src/Makevars.in @@ -36,6 +36,7 @@ OBJECTS = \ io/json11.o \ io/metadata.o \ io/parser.o \ + io/train_share_states.o \ io/tree.o \ metric/dcg_calculator.o \ metric/metric.o \ diff --git a/R-package/src/Makevars.win.in b/R-package/src/Makevars.win.in index c8d1be11f4bc..49835ad8d1c7 100644 --- a/R-package/src/Makevars.win.in +++ b/R-package/src/Makevars.win.in @@ -37,6 +37,7 @@ OBJECTS = \ io/json11.o \ io/metadata.o \ io/parser.o \ + io/train_share_states.o \ io/tree.o \ metric/dcg_calculator.o \ metric/metric.o \ diff --git a/include/LightGBM/bin.h b/include/LightGBM/bin.h index 987279e47716..e7ba45a83aa1 100644 --- a/include/LightGBM/bin.h +++ b/include/LightGBM/bin.h @@ -399,6 +399,7 @@ class MultiValBin { virtual double num_element_per_row() const = 0; + virtual const std::vector& offsets() const = 0; virtual void PushOneRow(int tid, data_size_t idx, const std::vector& values) = 0; @@ -408,7 +409,8 @@ class MultiValBin { virtual MultiValBin* CreateLike(data_size_t num_data, int num_bin, int num_feature, - double estimate_element_per_row) const = 0; + double estimate_element_per_row, + const std::vector& offsets) const = 0; virtual void CopySubcol(const MultiValBin* full_bin, const std::vector& used_feature_index, @@ -417,7 +419,7 @@ class MultiValBin { const std::vector& delta) = 0; virtual void ReSize(data_size_t num_data, int num_bin, int num_feature, - double estimate_element_per_row) = 0; + double estimate_element_per_row, const std::vector& offsets) = 0; virtual void CopySubrowAndSubcol( const MultiValBin* full_bin, const data_size_t* used_indices, @@ -447,13 +449,15 @@ class MultiValBin { virtual bool IsSparse() = 0; static MultiValBin* CreateMultiValBin(data_size_t num_data, int num_bin, - int num_feature, double sparse_rate); + int num_feature, double sparse_rate, const std::vector& offsets); static MultiValBin* CreateMultiValDenseBin(data_size_t num_data, int num_bin, - int num_feature); + int num_feature, const std::vector& offsets); static MultiValBin* CreateMultiValSparseBin(data_size_t num_data, int num_bin, double estimate_element_per_row); + static constexpr double multi_val_bin_sparse_threshold = 0.25f; + virtual MultiValBin* Clone() = 0; }; diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h index 2c6d74caa1d6..44c4196544b6 100644 --- a/include/LightGBM/dataset.h +++ b/include/LightGBM/dataset.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -275,57 +276,6 @@ class Parser { static Parser* CreateParser(const char* filename, bool header, int num_features, int label_idx); }; -struct TrainingShareStates { - int num_threads = 0; - bool is_colwise = true; - bool is_use_subcol = false; - bool is_use_subrow = false; - bool is_subrow_copied = false; - bool is_constant_hessian = true; - const data_size_t* bagging_use_indices; - data_size_t bagging_indices_cnt; - int num_bin_aligned; - std::unique_ptr multi_val_bin; - std::unique_ptr multi_val_bin_subset; - std::vector hist_move_src; - std::vector hist_move_dest; - std::vector hist_move_size; - std::vector> - hist_buf; - - void SetMultiValBin(MultiValBin* bin) { - num_threads = OMP_NUM_THREADS(); - if (bin == nullptr) { - return; - } - multi_val_bin.reset(bin); - num_bin_aligned = - (bin->num_bin() + kAlignedSize - 1) / kAlignedSize * kAlignedSize; - size_t new_size = static_cast(num_bin_aligned) * 2 * num_threads; - if (new_size > hist_buf.size()) { - hist_buf.resize(static_cast(num_bin_aligned) * 2 * num_threads); - } - } - - hist_t* TempBuf() { - if (!is_use_subcol) { - return nullptr; - } - return hist_buf.data() + hist_buf.size() - num_bin_aligned * 2; - } - - void HistMove(const hist_t* src, hist_t* dest) { - if (!is_use_subcol) { - return; - } -#pragma omp parallel for schedule(static) - for (int i = 0; i < static_cast(hist_move_src.size()); ++i) { - std::copy_n(src + hist_move_src[i], hist_move_size[i], - dest + hist_move_dest[i]); - } - } -}; - /*! \brief The main class of data set, * which are used to training or validation */ @@ -444,14 +394,14 @@ class Dataset { void CopySubrow(const Dataset* fullset, const data_size_t* used_indices, data_size_t num_used_indices, bool need_meta_data); - MultiValBin* GetMultiBinFromSparseFeatures() const; + MultiValBin* GetMultiBinFromSparseFeatures(const std::vector& offsets) const; - MultiValBin* GetMultiBinFromAllFeatures() const; + MultiValBin* GetMultiBinFromAllFeatures(const std::vector& offsets) const; TrainingShareStates* GetShareStates( score_t* gradients, score_t* hessians, const std::vector& is_feature_used, bool is_constant_hessian, - bool force_colwise, bool force_rowwise) const; + bool force_col_wise, bool force_row_wise) const; LIGHTGBM_EXPORT void FinishLoad(); diff --git a/include/LightGBM/feature_group.h b/include/LightGBM/feature_group.h index e39d6fbbf779..3fbf53856813 100644 --- a/include/LightGBM/feature_group.h +++ b/include/LightGBM/feature_group.h @@ -18,12 +18,16 @@ namespace LightGBM { class Dataset; class DatasetLoader; +class TrainingShareStates; +class MultiValBinWrapper; /*! \brief Using to store data and providing some operations on one feature * group*/ class FeatureGroup { public: friend Dataset; friend DatasetLoader; + friend TrainingShareStates; + friend MultiValBinWrapper; /*! * \brief Constructor * \param num_feature number of features of this group @@ -35,15 +39,27 @@ class FeatureGroup { std::vector>* bin_mappers, data_size_t num_data) : num_feature_(num_feature), is_multi_val_(is_multi_val > 0), is_sparse_(false) { CHECK_EQ(static_cast(bin_mappers->size()), num_feature); - // use bin at zero to store most_freq_bin - num_total_bin_ = 1; - bin_offsets_.emplace_back(num_total_bin_); auto& ref_bin_mappers = *bin_mappers; + double sum_sparse_rate = 0.0f; for (int i = 0; i < num_feature_; ++i) { bin_mappers_.emplace_back(ref_bin_mappers[i].release()); + sum_sparse_rate += bin_mappers_.back()->sparse_rate(); + } + sum_sparse_rate /= num_feature_; + int offset = 1; + is_dense_multi_val_ = false; + if (sum_sparse_rate < MultiValBin::multi_val_bin_sparse_threshold && is_multi_val_) { + // use dense multi val bin + offset = 0; + is_dense_multi_val_ = true; + } + // use bin at zero to store most_freq_bin only when not using dense multi val bin + num_total_bin_ = offset; + bin_offsets_.emplace_back(num_total_bin_); + for (int i = 0; i < num_feature_; ++i) { auto num_bin = bin_mappers_[i]->num_bin(); if (bin_mappers_[i]->GetMostFreqBin() == 0) { - num_bin -= 1; + num_bin -= offset; } num_total_bin_ += num_bin; bin_offsets_.emplace_back(num_total_bin_); @@ -54,6 +70,7 @@ class FeatureGroup { FeatureGroup(const FeatureGroup& other, int num_data) { num_feature_ = other.num_feature_; is_multi_val_ = other.is_multi_val_; + is_dense_multi_val_ = other.is_dense_multi_val_; is_sparse_ = other.is_sparse_; num_total_bin_ = other.num_total_bin_; bin_offsets_ = other.bin_offsets_; @@ -70,6 +87,7 @@ class FeatureGroup { CHECK_EQ(static_cast(bin_mappers->size()), 1); // use bin at zero to store default_bin num_total_bin_ = 1; + is_dense_multi_val_ = false; bin_offsets_.emplace_back(num_total_bin_); auto& ref_bin_mappers = *bin_mappers; for (int i = 0; i < num_feature_; ++i) { @@ -96,6 +114,8 @@ class FeatureGroup { // get is_sparse is_multi_val_ = *(reinterpret_cast(memory_ptr)); memory_ptr += VirtualFileWriter::AlignedSize(sizeof(is_multi_val_)); + is_dense_multi_val_ = *(reinterpret_cast(memory_ptr)); + memory_ptr += VirtualFileWriter::AlignedSize(sizeof(is_dense_multi_val_)); is_sparse_ = *(reinterpret_cast(memory_ptr)); memory_ptr += VirtualFileWriter::AlignedSize(sizeof(is_sparse_)); num_feature_ = *(reinterpret_cast(memory_ptr)); @@ -193,15 +213,41 @@ class FeatureGroup { void AddFeaturesFrom(const FeatureGroup* other) { CHECK(is_multi_val_); CHECK(other->is_multi_val_); + // every time when new features are added, we need to reconsider sparse or dense + double sum_sparse_rate = 0.0f; + for (int i = 0; i < num_feature_; ++i) { + sum_sparse_rate += bin_mappers_[i]->sparse_rate(); + } + for (int i = 0; i < other->num_feature_; ++i) { + sum_sparse_rate += other->bin_mappers_[i]->sparse_rate(); + } + sum_sparse_rate /= (num_feature_ + other->num_feature_); + int offset = 1; + is_dense_multi_val_ = false; + if (sum_sparse_rate < MultiValBin::multi_val_bin_sparse_threshold && is_multi_val_) { + // use dense multi val bin + offset = 0; + is_dense_multi_val_ = true; + } + bin_offsets_.clear(); + num_total_bin_ = offset; + bin_offsets_.emplace_back(num_total_bin_); + for (int i = 0; i < num_feature_; ++i) { + auto num_bin = bin_mappers_[i]->num_bin(); + if (bin_mappers_[i]->GetMostFreqBin() == 0) { + num_bin -= offset; + } + num_total_bin_ += num_bin; + bin_offsets_.emplace_back(num_total_bin_); + } for (int i = 0; i < other->num_feature_; ++i) { const auto& other_bin_mapper = other->bin_mappers_[i]; bin_mappers_.emplace_back(new BinMapper(*other_bin_mapper)); auto num_bin = other_bin_mapper->num_bin(); if (other_bin_mapper->GetMostFreqBin() == 0) { - num_bin -= 1; + num_bin -= offset; } num_total_bin_ += num_bin; - bin_offsets_.emplace_back(num_total_bin_); multi_bin_data_.emplace_back(other->multi_bin_data_[i]->Clone()); } num_feature_ += other->num_feature_; @@ -321,6 +367,7 @@ class FeatureGroup { */ void SaveBinaryToFile(const VirtualFileWriter* writer) const { writer->AlignedWrite(&is_multi_val_, sizeof(is_multi_val_)); + writer->AlignedWrite(&is_dense_multi_val_, sizeof(is_dense_multi_val_)); writer->AlignedWrite(&is_sparse_, sizeof(is_sparse_)); writer->AlignedWrite(&num_feature_, sizeof(num_feature_)); for (int i = 0; i < num_feature_; ++i) { @@ -340,6 +387,7 @@ class FeatureGroup { */ size_t SizesInByte() const { size_t ret = VirtualFileWriter::AlignedSize(sizeof(is_multi_val_)) + + VirtualFileWriter::AlignedSize(sizeof(is_dense_multi_val_)) + VirtualFileWriter::AlignedSize(sizeof(is_sparse_)) + VirtualFileWriter::AlignedSize(sizeof(num_feature_)); for (int i = 0; i < num_feature_; ++i) { @@ -362,6 +410,7 @@ class FeatureGroup { FeatureGroup(const FeatureGroup& other) { num_feature_ = other.num_feature_; is_multi_val_ = other.is_multi_val_; + is_dense_multi_val_ = other.is_dense_multi_val_; is_sparse_ = other.is_sparse_; num_total_bin_ = other.num_total_bin_; bin_offsets_ = other.bin_offsets_; @@ -420,6 +469,7 @@ class FeatureGroup { std::vector> multi_bin_data_; /*! \brief True if this feature is sparse */ bool is_multi_val_; + bool is_dense_multi_val_; bool is_sparse_; int num_total_bin_; }; diff --git a/include/LightGBM/train_share_states.h b/include/LightGBM/train_share_states.h new file mode 100644 index 000000000000..6dc7db7b7f77 --- /dev/null +++ b/include/LightGBM/train_share_states.h @@ -0,0 +1,227 @@ +/*! + * Copyright (c) 2016 Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See LICENSE file in the project root for license information. + */ +#ifndef LIGHTGBM_TRAIN_SHARE_STATES_H_ +#define LIGHTGBM_TRAIN_SHARE_STATES_H_ + +#include +#include +#include +#include + +#include +#include +#include + +namespace LightGBM { + +class MultiValBinWrapper { + public: + MultiValBinWrapper(MultiValBin* bin, data_size_t num_data, + const std::vector& feature_groups_contained); + + bool IsSparse() { + if (multi_val_bin_ != nullptr) { + return multi_val_bin_->IsSparse(); + } + return false; + } + + void InitTrain(const std::vector& group_feature_start, + const std::vector>& feature_groups, + const std::vector& is_feature_used, + const data_size_t* bagging_use_indices, + data_size_t bagging_indices_cnt); + + void HistMove(const std::vector>& hist_buf); + + void HistMerge(std::vector>* hist_buf); + + void ResizeHistBuf(std::vector>* hist_buf, + MultiValBin* sub_multi_val_bin, + hist_t* origin_hist_data); + + template + void ConstructHistograms(const data_size_t* data_indices, + data_size_t num_data, + const score_t* gradients, + const score_t* hessians, + std::vector>* hist_buf, + hist_t* origin_hist_data) { + const auto cur_multi_val_bin = (is_use_subcol_ || is_use_subrow_) + ? multi_val_bin_subset_.get() + : multi_val_bin_.get(); + if (cur_multi_val_bin != nullptr) { + global_timer.Start("Dataset::sparse_bin_histogram"); + n_data_block_ = 1; + data_block_size_ = num_data; + Threading::BlockInfo(num_threads_, num_data, min_block_size_, + max_block_size_, &n_data_block_, &data_block_size_); + ResizeHistBuf(hist_buf, cur_multi_val_bin, origin_hist_data); + OMP_INIT_EX(); + #pragma omp parallel for schedule(static) num_threads(num_threads_) + for (int block_id = 0; block_id < n_data_block_; ++block_id) { + OMP_LOOP_EX_BEGIN(); + data_size_t start = block_id * data_block_size_; + data_size_t end = std::min(start + data_block_size_, num_data); + ConstructHistogramsForBlock( + cur_multi_val_bin, start, end, data_indices, gradients, hessians, + block_id, hist_buf); + OMP_LOOP_EX_END(); + } + OMP_THROW_EX(); + global_timer.Stop("Dataset::sparse_bin_histogram"); + + global_timer.Start("Dataset::sparse_bin_histogram_merge"); + HistMerge(hist_buf); + global_timer.Stop("Dataset::sparse_bin_histogram_merge"); + global_timer.Start("Dataset::sparse_bin_histogram_move"); + HistMove(*hist_buf); + global_timer.Stop("Dataset::sparse_bin_histogram_move"); + } + } + + template + void ConstructHistogramsForBlock(const MultiValBin* sub_multi_val_bin, + data_size_t start, data_size_t end, const data_size_t* data_indices, + const score_t* gradients, const score_t* hessians, int block_id, + std::vector>* hist_buf) { + hist_t* data_ptr = origin_hist_data_; + if (block_id == 0) { + if (is_use_subcol_) { + data_ptr = hist_buf->data() + hist_buf->size() - 2 * static_cast(num_bin_aligned_); + } + } else { + data_ptr = hist_buf->data() + + static_cast(num_bin_aligned_) * (block_id - 1) * 2; + } + std::memset(reinterpret_cast(data_ptr), 0, num_bin_ * kHistBufferEntrySize); + if (USE_INDICES) { + if (ORDERED) { + sub_multi_val_bin->ConstructHistogramOrdered(data_indices, start, end, + gradients, hessians, data_ptr); + } else { + sub_multi_val_bin->ConstructHistogram(data_indices, start, end, gradients, + hessians, data_ptr); + } + } else { + sub_multi_val_bin->ConstructHistogram(start, end, gradients, hessians, + data_ptr); + } + } + + void CopyMultiValBinSubset(const std::vector& group_feature_start, + const std::vector>& feature_groups, + const std::vector& is_feature_used, + const data_size_t* bagging_use_indices, + data_size_t bagging_indices_cnt); + + void SetUseSubrow(bool is_use_subrow) { + is_use_subrow_ = is_use_subrow; + } + + void SetSubrowCopied(bool is_subrow_copied) { + is_subrow_copied_ = is_subrow_copied; + } + + private: + bool is_use_subcol_ = false; + bool is_use_subrow_ = false; + bool is_subrow_copied_ = false; + std::unique_ptr multi_val_bin_; + std::unique_ptr multi_val_bin_subset_; + MultiValBin* cur_multi_val_bin_; + std::vector hist_move_src_; + std::vector hist_move_dest_; + std::vector hist_move_size_; + const std::vector feature_groups_contained_; + + int num_threads_; + int max_block_size_; + int num_bin_; + int num_bin_aligned_; + int n_data_block_; + int data_block_size_; + int min_block_size_; + int num_data_; + + hist_t* origin_hist_data_; + + const size_t kHistBufferEntrySize = 2 * sizeof(hist_t); +}; + +struct TrainingShareStates { + int num_threads = 0; + bool is_col_wise = true; + bool is_constant_hessian = true; + const data_size_t* bagging_use_indices; + data_size_t bagging_indices_cnt; + + TrainingShareStates() { + multi_val_bin_wrapper_.reset(nullptr); + } + + uint64_t num_hist_total_bin() { return num_hist_total_bin_; } + + const std::vector& feature_hist_offsets() { return feature_hist_offsets_; } + + bool IsSparseRowwise() { + return (multi_val_bin_wrapper_ != nullptr && multi_val_bin_wrapper_->IsSparse()); + } + + void SetMultiValBin(MultiValBin* bin, data_size_t num_data, + const std::vector>& feature_groups, + bool dense_only, bool sparse_only); + + void CalcBinOffsets(const std::vector>& feature_groups, + std::vector* offsets, bool is_col_wise); + + void InitTrain(const std::vector& group_feature_start, + const std::vector>& feature_groups, + const std::vector& is_feature_used) { + if (multi_val_bin_wrapper_ != nullptr) { + multi_val_bin_wrapper_->InitTrain(group_feature_start, + feature_groups, + is_feature_used, + bagging_use_indices, + bagging_indices_cnt); + } + } + + template + void ConstructHistograms(const data_size_t* data_indices, + data_size_t num_data, + const score_t* gradients, + const score_t* hessians, + hist_t* hist_data) { + if (multi_val_bin_wrapper_ != nullptr) { + multi_val_bin_wrapper_->ConstructHistograms( + data_indices, num_data, gradients, hessians, &hist_buf_, hist_data); + } + } + + void SetUseSubrow(bool is_use_subrow) { + if (multi_val_bin_wrapper_ != nullptr) { + multi_val_bin_wrapper_->SetUseSubrow(is_use_subrow); + } + } + + void SetSubrowCopied(bool is_subrow_copied) { + if (multi_val_bin_wrapper_ != nullptr) { + multi_val_bin_wrapper_->SetSubrowCopied(is_subrow_copied); + } + } + + private: + std::vector feature_hist_offsets_; + uint64_t num_hist_total_bin_ = 0; + std::unique_ptr multi_val_bin_wrapper_; + std::vector> hist_buf_; + int num_total_bin_ = 0; + double num_elements_per_row_ = 0.0f; +}; + +} // namespace LightGBM + +#endif // LightGBM_TRAIN_SHARE_STATES_H_ diff --git a/include/LightGBM/utils/threading.h b/include/LightGBM/utils/threading.h index 2936bdd69985..0cdbc323ca7d 100644 --- a/include/LightGBM/utils/threading.h +++ b/include/LightGBM/utils/threading.h @@ -40,6 +40,24 @@ class Threading { } } + template + static inline void BlockInfo(int num_threads, INDEX_T cnt, + INDEX_T min_cnt_per_block, INDEX_T max_cnt_per_block, + int* out_nblock, INDEX_T* block_size) { + CHECK(max_cnt_per_block >= min_cnt_per_block); + *out_nblock = std::min( + num_threads, + static_cast((cnt + min_cnt_per_block - 1) / min_cnt_per_block)); + *out_nblock = std::max( + *out_nblock, + static_cast((cnt + max_cnt_per_block - 1) / max_cnt_per_block)); + if (*out_nblock > 1) { + *block_size = SIZE_ALIGNED((cnt + (*out_nblock) - 1) / (*out_nblock)); + } else { + *block_size = cnt; + } + } + template static inline void BlockInfoForceSize(int num_threads, INDEX_T cnt, INDEX_T min_cnt_per_block, diff --git a/src/io/bin.cpp b/src/io/bin.cpp index c0005007ad9d..9aaaa829ae73 100644 --- a/src/io/bin.cpp +++ b/src/io/bin.cpp @@ -661,26 +661,35 @@ namespace LightGBM { } } - MultiValBin* MultiValBin::CreateMultiValBin(data_size_t num_data, int num_bin, int num_feature, double sparse_rate) { - const double multi_val_bin_sparse_threshold = 0.25f; + MultiValBin* MultiValBin::CreateMultiValBin(data_size_t num_data, int num_bin, int num_feature, + double sparse_rate, const std::vector& offsets) { if (sparse_rate >= multi_val_bin_sparse_threshold) { const double average_element_per_row = (1.0 - sparse_rate) * num_feature; return CreateMultiValSparseBin(num_data, num_bin, average_element_per_row); } else { - return CreateMultiValDenseBin(num_data, num_bin, num_feature); + return CreateMultiValDenseBin(num_data, num_bin, num_feature, offsets); } } MultiValBin* MultiValBin::CreateMultiValDenseBin(data_size_t num_data, int num_bin, - int num_feature) { - if (num_bin <= 256) { - return new MultiValDenseBin(num_data, num_bin, num_feature); - } else if (num_bin <= 65536) { - return new MultiValDenseBin(num_data, num_bin, num_feature); + int num_feature, + const std::vector& offsets) { + // calculate max bin of all features to select the int type in MultiValDenseBin + int max_bin = 0; + for (int i = 0; i < static_cast(offsets.size()) - 1; ++i) { + int feature_bin = offsets[i + 1] - offsets[i]; + if (feature_bin > max_bin) { + max_bin = feature_bin; + } + } + if (max_bin <= 256) { + return new MultiValDenseBin(num_data, num_bin, num_feature, offsets); + } else if (max_bin <= 65536) { + return new MultiValDenseBin(num_data, num_bin, num_feature, offsets); } else { - return new MultiValDenseBin(num_data, num_bin, num_feature); + return new MultiValDenseBin(num_data, num_bin, num_feature, offsets); } } diff --git a/src/io/dataset.cpp b/src/io/dataset.cpp index ee8a62f87406..8ae079865332 100644 --- a/src/io/dataset.cpp +++ b/src/io/dataset.cpp @@ -443,6 +443,7 @@ void PushDataToMultiValBin( for (data_size_t i = start; i < end; ++i) { cur_data.clear(); for (size_t j = 0; j < most_freq_bins.size(); ++j) { + // for sparse multi value bin, we store the feature bin values with offset added auto cur_bin = (*iters)[tid][j]->Get(i); if (cur_bin == most_freq_bins[j]) { continue; @@ -465,15 +466,8 @@ void PushDataToMultiValBin( } for (data_size_t i = start; i < end; ++i) { for (size_t j = 0; j < most_freq_bins.size(); ++j) { + // for dense multi value bin, the feature bin values without offsets are used auto cur_bin = (*iters)[tid][j]->Get(i); - if (cur_bin == most_freq_bins[j]) { - cur_bin = 0; - } else { - cur_bin += offsets[j]; - if (most_freq_bins[j] == 0) { - cur_bin -= 1; - } - } cur_data[j] = cur_bin; } ret->PushOneRow(tid, i, cur_data); @@ -482,7 +476,7 @@ void PushDataToMultiValBin( } } -MultiValBin* Dataset::GetMultiBinFromSparseFeatures() const { +MultiValBin* Dataset::GetMultiBinFromSparseFeatures(const std::vector& offsets) const { Common::FunctionTimer fun_time("Dataset::GetMultiBinFromSparseFeatures", global_timer); int multi_group_id = -1; @@ -498,7 +492,6 @@ MultiValBin* Dataset::GetMultiBinFromSparseFeatures() const { if (multi_group_id < 0) { return nullptr; } - const auto& offsets = feature_groups_[multi_group_id]->bin_offsets_; const int num_feature = feature_groups_[multi_group_id]->num_feature_; int num_threads = OMP_NUM_THREADS(); @@ -521,13 +514,13 @@ MultiValBin* Dataset::GetMultiBinFromSparseFeatures() const { sum_sparse_rate); std::unique_ptr ret; ret.reset(MultiValBin::CreateMultiValBin(num_data_, offsets.back(), - num_feature, sum_sparse_rate)); + num_feature, sum_sparse_rate, offsets)); PushDataToMultiValBin(num_data_, most_freq_bins, offsets, &iters, ret.get()); ret->FinishLoad(); return ret.release(); } -MultiValBin* Dataset::GetMultiBinFromAllFeatures() const { +MultiValBin* Dataset::GetMultiBinFromAllFeatures(const std::vector& offsets) const { Common::FunctionTimer fun_time("Dataset::GetMultiBinFromAllFeatures", global_timer); int num_threads = OMP_NUM_THREADS(); @@ -536,20 +529,31 @@ MultiValBin* Dataset::GetMultiBinFromAllFeatures() const { std::unique_ptr ret; std::vector>> iters(num_threads); std::vector most_freq_bins; - std::vector offsets; - int num_total_bin = 1; - offsets.push_back(num_total_bin); + int ncol = 0; + for (int gid = 0; gid < num_groups_; ++gid) { + if (feature_groups_[gid]->is_multi_val_) { + ncol += feature_groups_[gid]->num_feature_; + } else { + ++ncol; + } + for (int fid = 0; fid < feature_groups_[gid]->num_feature_; ++fid) { + const auto& bin_mapper = feature_groups_[gid]->bin_mappers_[fid]; + sum_dense_ratio += 1.0f - bin_mapper->sparse_rate(); + } + } + sum_dense_ratio /= ncol; + const int offset = (1.0f - sum_dense_ratio) >= + MultiValBin::multi_val_bin_sparse_threshold ? 1 : 0; + int num_total_bin = offset; for (int gid = 0; gid < num_groups_; ++gid) { if (feature_groups_[gid]->is_multi_val_) { for (int fid = 0; fid < feature_groups_[gid]->num_feature_; ++fid) { const auto& bin_mapper = feature_groups_[gid]->bin_mappers_[fid]; - sum_dense_ratio += 1.0f - bin_mapper->sparse_rate(); most_freq_bins.push_back(bin_mapper->GetMostFreqBin()); num_total_bin += bin_mapper->num_bin(); if (most_freq_bins.back() == 0) { - num_total_bin -= 1; + num_total_bin -= offset; } - offsets.push_back(num_total_bin); #pragma omp parallel for schedule(static, 1) for (int tid = 0; tid < num_threads; ++tid) { iters[tid].emplace_back( @@ -558,23 +562,18 @@ MultiValBin* Dataset::GetMultiBinFromAllFeatures() const { } } else { most_freq_bins.push_back(0); - num_total_bin += feature_groups_[gid]->bin_offsets_.back() - 1; + num_total_bin += feature_groups_[gid]->bin_offsets_.back() - offset; for (int tid = 0; tid < num_threads; ++tid) { iters[tid].emplace_back(feature_groups_[gid]->FeatureGroupIterator()); } - offsets.push_back(num_total_bin); - for (int fid = 0; fid < feature_groups_[gid]->num_feature_; ++fid) { - const auto& bin_mapper = feature_groups_[gid]->bin_mappers_[fid]; - sum_dense_ratio += 1.0f - bin_mapper->sparse_rate(); - } } } - sum_dense_ratio /= static_cast(most_freq_bins.size()); + CHECK(static_cast(most_freq_bins.size()) == ncol); Log::Debug("Dataset::GetMultiBinFromAllFeatures: sparse rate %f", 1.0 - sum_dense_ratio); ret.reset(MultiValBin::CreateMultiValBin( num_data_, num_total_bin, static_cast(most_freq_bins.size()), - 1.0 - sum_dense_ratio)); + 1.0 - sum_dense_ratio, offsets)); PushDataToMultiValBin(num_data_, most_freq_bins, offsets, &iters, ret.get()); ret->FinishLoad(); return ret.release(); @@ -583,74 +582,90 @@ MultiValBin* Dataset::GetMultiBinFromAllFeatures() const { TrainingShareStates* Dataset::GetShareStates( score_t* gradients, score_t* hessians, const std::vector& is_feature_used, bool is_constant_hessian, - bool force_colwise, bool force_rowwise) const { + bool force_col_wise, bool force_row_wise) const { Common::FunctionTimer fun_timer("Dataset::TestMultiThreadingMethod", global_timer); - if (force_colwise && force_rowwise) { + if (force_col_wise && force_row_wise) { Log::Fatal( - "Cannot set both `force_col_wise` and `force_row_wise` to `true` at " + "Cannot set both of `force_col_wise` and `force_row_wise` to `true` at " "the same time"); } if (num_groups_ <= 0) { TrainingShareStates* share_state = new TrainingShareStates(); - share_state->is_colwise = true; + share_state->is_col_wise = true; share_state->is_constant_hessian = is_constant_hessian; return share_state; } - if (force_colwise) { + if (force_col_wise) { TrainingShareStates* share_state = new TrainingShareStates(); - share_state->SetMultiValBin(GetMultiBinFromSparseFeatures()); - share_state->is_colwise = true; + std::vector offsets; + share_state->CalcBinOffsets( + feature_groups_, &offsets, true); + share_state->SetMultiValBin(GetMultiBinFromSparseFeatures(offsets), + num_data_, feature_groups_, false, true); + share_state->is_col_wise = true; share_state->is_constant_hessian = is_constant_hessian; return share_state; - } else if (force_rowwise) { + } else if (force_row_wise) { TrainingShareStates* share_state = new TrainingShareStates(); - share_state->SetMultiValBin(GetMultiBinFromAllFeatures()); - share_state->is_colwise = false; + std::vector offsets; + share_state->CalcBinOffsets( + feature_groups_, &offsets, false); + share_state->SetMultiValBin(GetMultiBinFromAllFeatures(offsets), num_data_, + feature_groups_, false, false); + share_state->is_col_wise = false; share_state->is_constant_hessian = is_constant_hessian; return share_state; } else { std::unique_ptr sparse_bin; std::unique_ptr all_bin; - std::unique_ptr colwise_state; - std::unique_ptr rowwise_state; - colwise_state.reset(new TrainingShareStates()); - rowwise_state.reset(new TrainingShareStates()); + std::unique_ptr col_wise_state; + std::unique_ptr row_wise_state; + col_wise_state.reset(new TrainingShareStates()); + row_wise_state.reset(new TrainingShareStates()); - std::chrono::duration col_wise_init_time, - row_wise_init_time; + std::chrono::duration col_wise_init_time, row_wise_init_time; auto start_time = std::chrono::steady_clock::now(); - colwise_state->SetMultiValBin(GetMultiBinFromSparseFeatures()); + std::vector col_wise_offsets; + col_wise_state->CalcBinOffsets(feature_groups_, &col_wise_offsets, true); + col_wise_state->SetMultiValBin(GetMultiBinFromSparseFeatures(col_wise_offsets), num_data_, + feature_groups_, false, true); col_wise_init_time = std::chrono::steady_clock::now() - start_time; start_time = std::chrono::steady_clock::now(); - rowwise_state->SetMultiValBin(GetMultiBinFromAllFeatures()); + std::vector row_wise_offsets; + row_wise_state->CalcBinOffsets(feature_groups_, &row_wise_offsets, false); + row_wise_state->SetMultiValBin(GetMultiBinFromAllFeatures(row_wise_offsets), num_data_, + feature_groups_, false, false); + row_wise_init_time = std::chrono::steady_clock::now() - start_time; + + uint64_t max_total_bin = std::max(row_wise_state->num_hist_total_bin(), + col_wise_state->num_hist_total_bin()); std::vector> - hist_data(NumTotalBin() * 2); + hist_data(max_total_bin * 2); - row_wise_init_time = std::chrono::steady_clock::now() - start_time; Log::Debug( - "init for col-wise cost %f seconds, init for row-wise cost %f seconds", - col_wise_init_time * 1e-3, row_wise_init_time * 1e-3); - colwise_state->is_colwise = true; - colwise_state->is_constant_hessian = is_constant_hessian; - InitTrain(is_feature_used, colwise_state.get()); - rowwise_state->is_colwise = false; - rowwise_state->is_constant_hessian = is_constant_hessian; - InitTrain(is_feature_used, rowwise_state.get()); + "init for col-wise cost %f seconds, init for row-wise cost %f seconds", + col_wise_init_time * 1e-3, row_wise_init_time * 1e-3); + + col_wise_state->is_col_wise = true; + col_wise_state->is_constant_hessian = is_constant_hessian; + InitTrain(is_feature_used, col_wise_state.get()); + row_wise_state->is_col_wise = false; + row_wise_state->is_constant_hessian = is_constant_hessian; + InitTrain(is_feature_used, row_wise_state.get()); std::chrono::duration col_wise_time, row_wise_time; start_time = std::chrono::steady_clock::now(); ConstructHistograms(is_feature_used, nullptr, num_data_, gradients, - hessians, gradients, hessians, colwise_state.get(), + hessians, gradients, hessians, col_wise_state.get(), hist_data.data()); col_wise_time = std::chrono::steady_clock::now() - start_time; start_time = std::chrono::steady_clock::now(); ConstructHistograms(is_feature_used, nullptr, num_data_, gradients, - hessians, gradients, hessians, rowwise_state.get(), + hessians, gradients, hessians, row_wise_state.get(), hist_data.data()); row_wise_time = std::chrono::steady_clock::now() - start_time; - Log::Debug("col-wise cost %f seconds, row-wise cost %f seconds", - col_wise_time * 1e-3, row_wise_time * 1e-3); + if (col_wise_time < row_wise_time) { auto overhead_cost = row_wise_init_time + row_wise_time + col_wise_time; Log::Warning( @@ -658,7 +673,7 @@ TrainingShareStates* Dataset::GetShareStates( "%f seconds.\n" "You can set `force_col_wise=true` to remove the overhead.", overhead_cost * 1e-3); - return colwise_state.release(); + return col_wise_state.release(); } else { auto overhead_cost = col_wise_init_time + row_wise_time + col_wise_time; Log::Warning( @@ -667,12 +682,12 @@ TrainingShareStates* Dataset::GetShareStates( "You can set `force_row_wise=true` to remove the overhead.\n" "And if memory is not enough, you can set `force_col_wise=true`.", overhead_cost * 1e-3); - if (rowwise_state->multi_val_bin->IsSparse()) { + if (row_wise_state->IsSparseRowwise()) { Log::Debug("Using Sparse Multi-Val Bin"); } else { Log::Debug("Using Dense Multi-Val Bin"); } - return rowwise_state.release(); + return row_wise_state.release(); } } } @@ -1041,152 +1056,9 @@ void Dataset::DumpTextFile(const char* text_filename) { void Dataset::InitTrain(const std::vector& is_feature_used, TrainingShareStates* share_state) const { Common::FunctionTimer fun_time("Dataset::InitTrain", global_timer); - share_state->is_use_subcol = false; - if (share_state->multi_val_bin == nullptr) { - return; - } - const auto multi_val_bin = share_state->multi_val_bin.get(); - double sum_used_dense_ratio = 0.0; - double sum_dense_ratio = 0.0; - int num_used = 0; - int total = 0; - std::vector used_feature_index; - for (int i = 0; i < num_groups_; ++i) { - int f_start = group_feature_start_[i]; - if (feature_groups_[i]->is_multi_val_) { - for (int j = 0; j < feature_groups_[i]->num_feature_; ++j) { - const auto dense_rate = - 1.0 - feature_groups_[i]->bin_mappers_[j]->sparse_rate(); - if (is_feature_used[f_start + j]) { - ++num_used; - used_feature_index.push_back(total); - sum_used_dense_ratio += dense_rate; - } - sum_dense_ratio += dense_rate; - ++total; - } - } else if (!share_state->is_colwise) { - bool is_group_used = false; - double dense_rate = 0; - for (int j = 0; j < feature_groups_[i]->num_feature_; ++j) { - if (is_feature_used[f_start + j]) { - is_group_used = true; - } - dense_rate += 1.0 - feature_groups_[i]->bin_mappers_[j]->sparse_rate(); - } - if (is_group_used) { - ++num_used; - used_feature_index.push_back(total); - sum_used_dense_ratio += dense_rate; - } - sum_dense_ratio += dense_rate; - ++total; - } - } - const double k_subfeature_threshold = 0.6; - if (sum_used_dense_ratio >= sum_dense_ratio * k_subfeature_threshold) { - // only need to copy subset - if (share_state->is_use_subrow && !share_state->is_subrow_copied) { - if (share_state->multi_val_bin_subset == nullptr) { - share_state->multi_val_bin_subset.reset(multi_val_bin->CreateLike( - share_state->bagging_indices_cnt, multi_val_bin->num_bin(), total, - multi_val_bin->num_element_per_row())); - } else { - share_state->multi_val_bin_subset->ReSize( - share_state->bagging_indices_cnt, multi_val_bin->num_bin(), total, - multi_val_bin->num_element_per_row()); - } - share_state->multi_val_bin_subset->CopySubrow( - multi_val_bin, share_state->bagging_use_indices, - share_state->bagging_indices_cnt); - // avoid to copy subset many times - share_state->is_subrow_copied = true; - } - } else { - share_state->is_use_subcol = true; - std::vector upper_bound; - std::vector lower_bound; - std::vector delta; - share_state->hist_move_src.clear(); - share_state->hist_move_dest.clear(); - share_state->hist_move_size.clear(); - - int num_total_bin = 1; - int new_num_total_bin = 1; - - for (int i = 0; i < num_groups_; ++i) { - int f_start = group_feature_start_[i]; - if (feature_groups_[i]->is_multi_val_) { - for (int j = 0; j < feature_groups_[i]->num_feature_; ++j) { - const auto& bin_mapper = feature_groups_[i]->bin_mappers_[j]; - int cur_num_bin = bin_mapper->num_bin(); - if (bin_mapper->GetMostFreqBin() == 0) { - cur_num_bin -= 1; - } - num_total_bin += cur_num_bin; - if (is_feature_used[f_start + j]) { - new_num_total_bin += cur_num_bin; - - lower_bound.push_back(num_total_bin - cur_num_bin); - upper_bound.push_back(num_total_bin); - - share_state->hist_move_src.push_back( - (new_num_total_bin - cur_num_bin) * 2); - share_state->hist_move_dest.push_back((num_total_bin - cur_num_bin) * - 2); - share_state->hist_move_size.push_back(cur_num_bin * 2); - delta.push_back(num_total_bin - new_num_total_bin); - } - } - } else if (!share_state->is_colwise) { - bool is_group_used = false; - for (int j = 0; j < feature_groups_[i]->num_feature_; ++j) { - if (is_feature_used[f_start + j]) { - is_group_used = true; - break; - } - } - int cur_num_bin = feature_groups_[i]->bin_offsets_.back() - 1; - num_total_bin += cur_num_bin; - if (is_group_used) { - new_num_total_bin += cur_num_bin; - - lower_bound.push_back(num_total_bin - cur_num_bin); - upper_bound.push_back(num_total_bin); - - share_state->hist_move_src.push_back( - (new_num_total_bin - cur_num_bin) * 2); - share_state->hist_move_dest.push_back((num_total_bin - cur_num_bin) * - 2); - share_state->hist_move_size.push_back(cur_num_bin * 2); - delta.push_back(num_total_bin - new_num_total_bin); - } - } - } - // avoid out of range - lower_bound.push_back(num_total_bin); - upper_bound.push_back(num_total_bin); - data_size_t num_data = - share_state->is_use_subrow ? share_state->bagging_indices_cnt : num_data_; - if (share_state->multi_val_bin_subset == nullptr) { - share_state->multi_val_bin_subset.reset(multi_val_bin->CreateLike( - num_data, new_num_total_bin, num_used, sum_used_dense_ratio)); - } else { - share_state->multi_val_bin_subset->ReSize(num_data, new_num_total_bin, - num_used, sum_used_dense_ratio); - } - if (share_state->is_use_subrow) { - share_state->multi_val_bin_subset->CopySubrowAndSubcol( - multi_val_bin, share_state->bagging_use_indices, - share_state->bagging_indices_cnt, used_feature_index, lower_bound, - upper_bound, delta); - // may need to recopy subset - share_state->is_subrow_copied = false; - } else { - share_state->multi_val_bin_subset->CopySubcol( - multi_val_bin, used_feature_index, lower_bound, upper_bound, delta); - } - } + share_state->InitTrain(group_feature_start_, + feature_groups_, + is_feature_used); } template @@ -1198,80 +1070,8 @@ void Dataset::ConstructHistogramsMultiVal(const data_size_t* data_indices, hist_t* hist_data) const { Common::FunctionTimer fun_time("Dataset::ConstructHistogramsMultiVal", global_timer); - const auto multi_val_bin = - (share_state->is_use_subcol || share_state->is_use_subrow) - ? share_state->multi_val_bin_subset.get() - : share_state->multi_val_bin.get(); - if (multi_val_bin == nullptr) { - return; - } - global_timer.Start("Dataset::sparse_bin_histogram"); - const int num_bin = multi_val_bin->num_bin(); - const int num_bin_aligned = - (num_bin + kAlignedSize - 1) / kAlignedSize * kAlignedSize; - int n_data_block = 1; - int data_block_size = num_data; - Threading::BlockInfo(share_state->num_threads, num_data, 1024, - &n_data_block, &data_block_size); - const size_t buf_size = - static_cast(n_data_block - 1) * num_bin_aligned * 2; - if (share_state->hist_buf.size() < buf_size) { - share_state->hist_buf.resize(buf_size); - } - auto origin_hist_data = hist_data; - if (share_state->is_use_subcol) { - hist_data = share_state->TempBuf(); - } - OMP_INIT_EX(); -#pragma omp parallel for schedule(static, 1) num_threads(share_state->num_threads) - for (int tid = 0; tid < n_data_block; ++tid) { - OMP_LOOP_EX_BEGIN(); - data_size_t start = tid * data_block_size; - data_size_t end = std::min(start + data_block_size, num_data); - auto data_ptr = hist_data; - if (tid > 0) { - data_ptr = share_state->hist_buf.data() + - static_cast(num_bin_aligned) * 2 * (tid - 1); - } - std::memset(reinterpret_cast(data_ptr), 0, num_bin * kHistEntrySize); - if (USE_INDICES) { - if (ORDERED) { - multi_val_bin->ConstructHistogramOrdered(data_indices, start, end, - gradients, hessians, data_ptr); - } else { - multi_val_bin->ConstructHistogram(data_indices, start, end, gradients, - hessians, data_ptr); - } - } else { - multi_val_bin->ConstructHistogram(start, end, gradients, hessians, - data_ptr); - } - OMP_LOOP_EX_END(); - } - OMP_THROW_EX(); - global_timer.Stop("Dataset::sparse_bin_histogram"); - - global_timer.Start("Dataset::sparse_bin_histogram_merge"); - int n_bin_block = 1; - int bin_block_size = num_bin; - Threading::BlockInfo(share_state->num_threads, num_bin, 512, &n_bin_block, - &bin_block_size); -#pragma omp parallel for schedule(static, 1) num_threads(share_state->num_threads) - for (int t = 0; t < n_bin_block; ++t) { - const int start = t * bin_block_size; - const int end = std::min(start + bin_block_size, num_bin); - for (int tid = 1; tid < n_data_block; ++tid) { - auto src_ptr = share_state->hist_buf.data() + - static_cast(num_bin_aligned) * 2 * (tid - 1); - for (int i = start * 2; i < end * 2; ++i) { - hist_data[i] += src_ptr[i]; - } - } - } - global_timer.Stop("Dataset::sparse_bin_histogram_merge"); - global_timer.Start("Dataset::sparse_bin_histogram_move"); - share_state->HistMove(hist_data, origin_hist_data); - global_timer.Stop("Dataset::sparse_bin_histogram_move"); + share_state->ConstructHistograms( + data_indices, num_data, gradients, hessians, hist_data); } template @@ -1280,7 +1080,7 @@ void Dataset::ConstructHistogramsInner( data_size_t num_data, const score_t* gradients, const score_t* hessians, score_t* ordered_gradients, score_t* ordered_hessians, TrainingShareStates* share_state, hist_t* hist_data) const { - if (!share_state->is_colwise) { + if (!share_state->is_col_wise) { return ConstructHistogramsMultiVal( data_indices, num_data, gradients, hessians, share_state, hist_data); } diff --git a/src/io/multi_val_dense_bin.hpp b/src/io/multi_val_dense_bin.hpp index 7645530d774b..9559e38b7f72 100644 --- a/src/io/multi_val_dense_bin.hpp +++ b/src/io/multi_val_dense_bin.hpp @@ -18,8 +18,10 @@ namespace LightGBM { template class MultiValDenseBin : public MultiValBin { public: - explicit MultiValDenseBin(data_size_t num_data, int num_bin, int num_feature) - : num_data_(num_data), num_bin_(num_bin), num_feature_(num_feature) { + explicit MultiValDenseBin(data_size_t num_data, int num_bin, int num_feature, + const std::vector& offsets) + : num_data_(num_data), num_bin_(num_bin), num_feature_(num_feature), + offsets_(offsets) { data_.resize(static_cast(num_data_) * num_feature_, static_cast(0)); } @@ -36,6 +38,8 @@ class MultiValDenseBin : public MultiValBin { double num_element_per_row() const override { return num_feature_; } + const std::vector& offsets() const override { return offsets_; } + void PushOneRow(int , data_size_t idx, const std::vector& values) override { auto start = RowPtr(idx); for (auto i = 0; i < num_feature_; ++i) { @@ -50,13 +54,13 @@ class MultiValDenseBin : public MultiValBin { return false; } - template void ConstructHistogramInner(const data_size_t* data_indices, data_size_t start, data_size_t end, const score_t* gradients, const score_t* hessians, hist_t* out) const { data_size_t i = start; hist_t* grad = out; hist_t* hess = out + 1; + if (USE_PREFETCH) { const data_size_t pf_offset = 32 / sizeof(VAL_T); const data_size_t pf_end = end - pf_offset; @@ -70,30 +74,28 @@ class MultiValDenseBin : public MultiValBin { } PREFETCH_T0(data_.data() + RowPtr(pf_idx)); const auto j_start = RowPtr(idx); - for (auto j = j_start; j < j_start + num_feature_; ++j) { - const auto ti = static_cast(data_[j]) << 1; - if (ORDERED) { - grad[ti] += gradients[i]; - hess[ti] += hessians[i]; - } else { - grad[ti] += gradients[idx]; - hess[ti] += hessians[idx]; - } + const VAL_T* data_ptr = data_.data() + j_start; + const score_t gradient = ORDERED ? gradients[i] : gradients[idx]; + const score_t hessian = ORDERED ? hessians[i] : hessians[idx]; + for (int j = 0; j < num_feature_; ++j) { + const uint32_t bin = static_cast(data_ptr[j]); + const auto ti = (bin + offsets_[j]) << 1; + grad[ti] += gradient; + hess[ti] += hessian; } } } for (; i < end; ++i) { const auto idx = USE_INDICES ? data_indices[i] : i; const auto j_start = RowPtr(idx); - for (auto j = j_start; j < j_start + num_feature_; ++j) { - const auto ti = static_cast(data_[j]) << 1; - if (ORDERED) { - grad[ti] += gradients[i]; - hess[ti] += hessians[i]; - } else { - grad[ti] += gradients[idx]; - hess[ti] += hessians[idx]; - } + const VAL_T* data_ptr = data_.data() + j_start; + const score_t gradient = ORDERED ? gradients[i] : gradients[idx]; + const score_t hessian = ORDERED ? hessians[i] : hessians[idx]; + for (int j = 0; j < num_feature_; ++j) { + const uint32_t bin = static_cast(data_ptr[j]); + const auto ti = (bin + offsets_[j]) << 1; + grad[ti] += gradient; + hess[ti] += hessian; } } } @@ -121,15 +123,17 @@ class MultiValDenseBin : public MultiValBin { gradients, hessians, out); } - MultiValBin* CreateLike(data_size_t num_data, int num_bin, int num_feature, double) const override { - return new MultiValDenseBin(num_data, num_bin, num_feature); + MultiValBin* CreateLike(data_size_t num_data, int num_bin, int num_feature, double, + const std::vector& offsets) const override { + return new MultiValDenseBin(num_data, num_bin, num_feature, offsets); } void ReSize(data_size_t num_data, int num_bin, int num_feature, - double) override { + double, const std::vector& offsets) override { num_data_ = num_data; num_bin_ = num_bin; num_feature_ = num_feature; + offsets_ = offsets; size_t new_size = static_cast(num_feature_) * num_data_; if (data_.size() < new_size) { data_.resize(new_size, 0); @@ -139,8 +143,7 @@ class MultiValDenseBin : public MultiValBin { template void CopyInner(const MultiValBin* full_bin, const data_size_t* used_indices, data_size_t num_used_indices, - const std::vector& used_feature_index, - const std::vector& delta) { + const std::vector& used_feature_index) { const auto other_bin = reinterpret_cast*>(full_bin); if (SUBROW) { @@ -162,8 +165,7 @@ class MultiValDenseBin : public MultiValBin { if (SUBCOL) { if (other_bin->data_[other_j_start + used_feature_index[j]] > 0) { data_[j_start + j] = static_cast( - other_bin->data_[other_j_start + used_feature_index[j]] - - delta[j]); + other_bin->data_[other_j_start + used_feature_index[j]]); } else { data_[j_start + j] = 0; } @@ -180,16 +182,15 @@ class MultiValDenseBin : public MultiValBin { void CopySubrow(const MultiValBin* full_bin, const data_size_t* used_indices, data_size_t num_used_indices) override { CopyInner(full_bin, used_indices, num_used_indices, - std::vector(), std::vector()); + std::vector()); } void CopySubcol(const MultiValBin* full_bin, const std::vector& used_feature_index, const std::vector&, const std::vector&, - const std::vector& delta) override { - CopyInner(full_bin, nullptr, num_data_, used_feature_index, - delta); + const std::vector&) override { + CopyInner(full_bin, nullptr, num_data_, used_feature_index); } void CopySubrowAndSubcol(const MultiValBin* full_bin, @@ -198,9 +199,9 @@ class MultiValDenseBin : public MultiValBin { const std::vector& used_feature_index, const std::vector&, const std::vector&, - const std::vector& delta) override { + const std::vector&) override { CopyInner(full_bin, used_indices, num_used_indices, - used_feature_index, delta); + used_feature_index); } inline size_t RowPtr(data_size_t idx) const { @@ -213,10 +214,12 @@ class MultiValDenseBin : public MultiValBin { data_size_t num_data_; int num_bin_; int num_feature_; + std::vector offsets_; std::vector> data_; MultiValDenseBin(const MultiValDenseBin& other) - : num_data_(other.num_data_), num_bin_(other.num_bin_), num_feature_(other.num_feature_), data_(other.data_) { + : num_data_(other.num_data_), num_bin_(other.num_bin_), num_feature_(other.num_feature_), + offsets_(other.offsets_), data_(other.data_) { } }; diff --git a/src/io/multi_val_sparse_bin.hpp b/src/io/multi_val_sparse_bin.hpp index ec3f64a11a02..1699380732c6 100644 --- a/src/io/multi_val_sparse_bin.hpp +++ b/src/io/multi_val_sparse_bin.hpp @@ -46,6 +46,8 @@ class MultiValSparseBin : public MultiValBin { return estimate_element_per_row_; } + const std::vector& offsets() const override { return offsets_; } + void PushOneRow(int tid, data_size_t idx, const std::vector& values) override { const int pre_alloc_size = 50; @@ -114,6 +116,7 @@ class MultiValSparseBin : public MultiValBin { data_size_t i = start; hist_t* grad = out; hist_t* hess = out + 1; + const VAL_T* data_ptr = data_.data(); if (USE_PREFETCH) { const data_size_t pf_offset = 32 / sizeof(VAL_T); const data_size_t pf_end = end - pf_offset; @@ -127,18 +130,15 @@ class MultiValSparseBin : public MultiValBin { PREFETCH_T0(hessians + pf_idx); } PREFETCH_T0(row_ptr_.data() + pf_idx); - PREFETCH_T0(data_.data() + row_ptr_[pf_idx]); + PREFETCH_T0(data_ptr + row_ptr_[pf_idx]); const auto j_start = RowPtr(idx); const auto j_end = RowPtr(idx + 1); + const score_t gradient = ORDERED ? gradients[i] : gradients[idx]; + const score_t hessian = ORDERED ? hessians[i] : hessians[idx]; for (auto j = j_start; j < j_end; ++j) { - const auto ti = static_cast(data_[j]) << 1; - if (ORDERED) { - grad[ti] += gradients[i]; - hess[ti] += hessians[i]; - } else { - grad[ti] += gradients[idx]; - hess[ti] += hessians[idx]; - } + const auto ti = static_cast(data_ptr[j]) << 1; + grad[ti] += gradient; + hess[ti] += hessian; } } } @@ -146,15 +146,12 @@ class MultiValSparseBin : public MultiValBin { const auto idx = USE_INDICES ? data_indices[i] : i; const auto j_start = RowPtr(idx); const auto j_end = RowPtr(idx + 1); + const score_t gradient = ORDERED ? gradients[i] : gradients[idx]; + const score_t hessian = ORDERED ? hessians[i] : hessians[idx]; for (auto j = j_start; j < j_end; ++j) { - const auto ti = static_cast(data_[j]) << 1; - if (ORDERED) { - grad[ti] += gradients[i]; - hess[ti] += hessians[i]; - } else { - grad[ti] += gradients[idx]; - hess[ti] += hessians[idx]; - } + const auto ti = static_cast(data_ptr[j]) << 1; + grad[ti] += gradient; + hess[ti] += hessian; } } } @@ -183,13 +180,14 @@ class MultiValSparseBin : public MultiValBin { } MultiValBin* CreateLike(data_size_t num_data, int num_bin, int, - double estimate_element_per_row) const override { + double estimate_element_per_row, + const std::vector& /*offsets*/) const override { return new MultiValSparseBin(num_data, num_bin, estimate_element_per_row); } void ReSize(data_size_t num_data, int num_bin, int, - double estimate_element_per_row) override { + double estimate_element_per_row, const std::vector& /*offsets*/) override { num_data_ = num_data; num_bin_ = num_bin; estimate_element_per_row_ = estimate_element_per_row; @@ -302,6 +300,7 @@ class MultiValSparseBin : public MultiValBin { std::vector>> t_data_; std::vector t_size_; + std::vector offsets_; MultiValSparseBin( const MultiValSparseBin& other) diff --git a/src/io/train_share_states.cpp b/src/io/train_share_states.cpp new file mode 100644 index 000000000000..7762bac52685 --- /dev/null +++ b/src/io/train_share_states.cpp @@ -0,0 +1,407 @@ +/*! + * Copyright (c) 2016 Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See LICENSE file in the project root for + * license information. + */ + +#include + +namespace LightGBM { + +MultiValBinWrapper::MultiValBinWrapper(MultiValBin* bin, data_size_t num_data, + const std::vector& feature_groups_contained): + feature_groups_contained_(feature_groups_contained) { + num_threads_ = OMP_NUM_THREADS(); + max_block_size_ = num_data; + num_data_ = num_data; + multi_val_bin_.reset(bin); + if (bin == nullptr) { + return; + } + num_bin_ = bin->num_bin(); + num_bin_aligned_ = (num_bin_ + kAlignedSize - 1) / kAlignedSize * kAlignedSize; +} + +void MultiValBinWrapper::InitTrain(const std::vector& group_feature_start, + const std::vector>& feature_groups, + const std::vector& is_feature_used, + const data_size_t* bagging_use_indices, + data_size_t bagging_indices_cnt) { + is_use_subcol_ = false; + if (multi_val_bin_ == nullptr) { + return; + } + CopyMultiValBinSubset(group_feature_start, feature_groups, + is_feature_used, bagging_use_indices, bagging_indices_cnt); + const auto cur_multi_val_bin = (is_use_subcol_ || is_use_subrow_) + ? multi_val_bin_subset_.get() + : multi_val_bin_.get(); + if (cur_multi_val_bin != nullptr) { + num_bin_ = cur_multi_val_bin->num_bin(); + num_bin_aligned_ = (num_bin_ + kAlignedSize - 1) / kAlignedSize * kAlignedSize; + min_block_size_ = std::min(static_cast(0.3f * num_bin_ / + cur_multi_val_bin->num_element_per_row()) + 1, 1024); + } +} + +void MultiValBinWrapper::HistMove(const std::vector>& hist_buf) { + if (!is_use_subcol_) { + return; + } + const hist_t* src = hist_buf.data() + hist_buf.size() - + 2 * static_cast(num_bin_aligned_); + #pragma omp parallel for schedule(static) + for (int i = 0; i < static_cast(hist_move_src_.size()); ++i) { + std::copy_n(src + hist_move_src_[i], hist_move_size_[i], + origin_hist_data_ + hist_move_dest_[i]); + } +} + +void MultiValBinWrapper::HistMerge(std::vector>* hist_buf) { + int n_bin_block = 1; + int bin_block_size = num_bin_; + Threading::BlockInfo(num_threads_, num_bin_, 512, &n_bin_block, + &bin_block_size); + hist_t* dst = origin_hist_data_; + if (is_use_subcol_) { + dst = hist_buf->data() + hist_buf->size() - 2 * static_cast(num_bin_aligned_); + } + #pragma omp parallel for schedule(static, 1) num_threads(num_threads_) + for (int t = 0; t < n_bin_block; ++t) { + const int start = t * bin_block_size; + const int end = std::min(start + bin_block_size, num_bin_); + for (int tid = 1; tid < n_data_block_; ++tid) { + auto src_ptr = hist_buf->data() + static_cast(num_bin_aligned_) * 2 * (tid - 1); + for (int i = start * 2; i < end * 2; ++i) { + dst[i] += src_ptr[i]; + } + } + } +} + +void MultiValBinWrapper::ResizeHistBuf(std::vector>* hist_buf, + MultiValBin* sub_multi_val_bin, + hist_t* origin_hist_data) { + num_bin_ = sub_multi_val_bin->num_bin(); + num_bin_aligned_ = (num_bin_ + kAlignedSize - 1) / kAlignedSize * kAlignedSize; + origin_hist_data_ = origin_hist_data; + size_t new_buf_size = static_cast(n_data_block_) * static_cast(num_bin_aligned_) * 2; + if (hist_buf->size() < new_buf_size) { + hist_buf->resize(new_buf_size); + } +} + +void MultiValBinWrapper::CopyMultiValBinSubset( + const std::vector& group_feature_start, + const std::vector>& feature_groups, + const std::vector& is_feature_used, + const data_size_t* bagging_use_indices, + data_size_t bagging_indices_cnt) { + double sum_used_dense_ratio = 0.0; + double sum_dense_ratio = 0.0; + int num_used = 0; + int total = 0; + std::vector used_feature_index; + for (int i : feature_groups_contained_) { + int f_start = group_feature_start[i]; + if (feature_groups[i]->is_multi_val_) { + for (int j = 0; j < feature_groups[i]->num_feature_; ++j) { + const auto dense_rate = + 1.0 - feature_groups[i]->bin_mappers_[j]->sparse_rate(); + if (is_feature_used[f_start + j]) { + ++num_used; + used_feature_index.push_back(total); + sum_used_dense_ratio += dense_rate; + } + sum_dense_ratio += dense_rate; + ++total; + } + } else { + bool is_group_used = false; + double dense_rate = 0; + for (int j = 0; j < feature_groups[i]->num_feature_; ++j) { + if (is_feature_used[f_start + j]) { + is_group_used = true; + } + dense_rate += 1.0 - feature_groups[i]->bin_mappers_[j]->sparse_rate(); + } + if (is_group_used) { + ++num_used; + used_feature_index.push_back(total); + sum_used_dense_ratio += dense_rate; + } + sum_dense_ratio += dense_rate; + ++total; + } + } + const double k_subfeature_threshold = 0.6; + if (sum_used_dense_ratio >= sum_dense_ratio * k_subfeature_threshold) { + // only need to copy subset + if (is_use_subrow_ && !is_subrow_copied_) { + if (multi_val_bin_subset_ == nullptr) { + multi_val_bin_subset_.reset(multi_val_bin_->CreateLike( + bagging_indices_cnt, multi_val_bin_->num_bin(), total, + multi_val_bin_->num_element_per_row(), multi_val_bin_->offsets())); + } else { + multi_val_bin_subset_->ReSize( + bagging_indices_cnt, multi_val_bin_->num_bin(), total, + multi_val_bin_->num_element_per_row(), multi_val_bin_->offsets()); + } + multi_val_bin_subset_->CopySubrow( + multi_val_bin_.get(), bagging_use_indices, + bagging_indices_cnt); + // avoid to copy subset many times + is_subrow_copied_ = true; + } + } else { + is_use_subcol_ = true; + std::vector upper_bound; + std::vector lower_bound; + std::vector delta; + std::vector offsets; + hist_move_src_.clear(); + hist_move_dest_.clear(); + hist_move_size_.clear(); + + const int offset = multi_val_bin_->IsSparse() ? 1 : 0; + int num_total_bin = offset; + int new_num_total_bin = offset; + offsets.push_back(static_cast(new_num_total_bin)); + for (int i : feature_groups_contained_) { + int f_start = group_feature_start[i]; + if (feature_groups[i]->is_multi_val_) { + for (int j = 0; j < feature_groups[i]->num_feature_; ++j) { + const auto& bin_mapper = feature_groups[i]->bin_mappers_[j]; + int cur_num_bin = bin_mapper->num_bin(); + if (bin_mapper->GetMostFreqBin() == 0) { + cur_num_bin -= offset; + } + num_total_bin += cur_num_bin; + if (is_feature_used[f_start + j]) { + new_num_total_bin += cur_num_bin; + offsets.push_back(static_cast(new_num_total_bin)); + lower_bound.push_back(num_total_bin - cur_num_bin); + upper_bound.push_back(num_total_bin); + + hist_move_src_.push_back( + (new_num_total_bin - cur_num_bin) * 2); + hist_move_dest_.push_back((num_total_bin - cur_num_bin) * + 2); + hist_move_size_.push_back(cur_num_bin * 2); + delta.push_back(num_total_bin - new_num_total_bin); + } + } + } else { + bool is_group_used = false; + for (int j = 0; j < feature_groups[i]->num_feature_; ++j) { + if (is_feature_used[f_start + j]) { + is_group_used = true; + break; + } + } + int cur_num_bin = feature_groups[i]->bin_offsets_.back() - offset; + num_total_bin += cur_num_bin; + if (is_group_used) { + new_num_total_bin += cur_num_bin; + offsets.push_back(static_cast(new_num_total_bin)); + lower_bound.push_back(num_total_bin - cur_num_bin); + upper_bound.push_back(num_total_bin); + + hist_move_src_.push_back( + (new_num_total_bin - cur_num_bin) * 2); + hist_move_dest_.push_back((num_total_bin - cur_num_bin) * + 2); + hist_move_size_.push_back(cur_num_bin * 2); + delta.push_back(num_total_bin - new_num_total_bin); + } + } + } + // avoid out of range + lower_bound.push_back(num_total_bin); + upper_bound.push_back(num_total_bin); + data_size_t num_data = is_use_subrow_ ? bagging_indices_cnt : num_data_; + if (multi_val_bin_subset_ == nullptr) { + multi_val_bin_subset_.reset(multi_val_bin_->CreateLike( + num_data, new_num_total_bin, num_used, sum_used_dense_ratio, offsets)); + } else { + multi_val_bin_subset_->ReSize(num_data, new_num_total_bin, + num_used, sum_used_dense_ratio, offsets); + } + if (is_use_subrow_) { + multi_val_bin_subset_->CopySubrowAndSubcol( + multi_val_bin_.get(), bagging_use_indices, + bagging_indices_cnt, used_feature_index, lower_bound, + upper_bound, delta); + // may need to recopy subset + is_subrow_copied_ = false; + } else { + multi_val_bin_subset_->CopySubcol( + multi_val_bin_.get(), used_feature_index, lower_bound, upper_bound, delta); + } + } +} + +void TrainingShareStates::CalcBinOffsets(const std::vector>& feature_groups, + std::vector* offsets, bool is_col_wise) { + offsets->clear(); + feature_hist_offsets_.clear(); + if (is_col_wise) { + uint32_t cur_num_bin = 0; + uint32_t hist_cur_num_bin = 0; + for (int group = 0; group < static_cast(feature_groups.size()); ++group) { + const std::unique_ptr& feature_group = feature_groups[group]; + if (feature_group->is_multi_val_) { + if (feature_group->is_dense_multi_val_) { + for (int i = 0; i < feature_group->num_feature_; ++i) { + const std::unique_ptr& bin_mapper = feature_group->bin_mappers_[i]; + if (group == 0 && i == 0 && bin_mapper->GetMostFreqBin() > 0) { + cur_num_bin += 1; + hist_cur_num_bin += 1; + } + offsets->push_back(cur_num_bin); + feature_hist_offsets_.push_back(hist_cur_num_bin); + int num_bin = bin_mapper->num_bin(); + hist_cur_num_bin += num_bin; + if (bin_mapper->GetMostFreqBin() == 0) { + feature_hist_offsets_.back() += 1; + } + cur_num_bin += num_bin; + } + offsets->push_back(cur_num_bin); + CHECK(cur_num_bin == feature_group->bin_offsets_.back()); + } else { + cur_num_bin += 1; + hist_cur_num_bin += 1; + for (int i = 0; i < feature_group->num_feature_; ++i) { + offsets->push_back(cur_num_bin); + feature_hist_offsets_.push_back(hist_cur_num_bin); + const std::unique_ptr& bin_mapper = feature_group->bin_mappers_[i]; + int num_bin = bin_mapper->num_bin(); + if (bin_mapper->GetMostFreqBin() == 0) { + num_bin -= 1; + } + hist_cur_num_bin += num_bin; + cur_num_bin += num_bin; + } + offsets->push_back(cur_num_bin); + CHECK(cur_num_bin == feature_group->bin_offsets_.back()); + } + } else { + for (int i = 0; i < feature_group->num_feature_; ++i) { + feature_hist_offsets_.push_back(hist_cur_num_bin + feature_group->bin_offsets_[i]); + } + hist_cur_num_bin += feature_group->bin_offsets_.back(); + } + } + feature_hist_offsets_.push_back(hist_cur_num_bin); + num_hist_total_bin_ = static_cast(feature_hist_offsets_.back()); + } else { + double sum_dense_ratio = 0.0f; + int ncol = 0; + for (int gid = 0; gid < static_cast(feature_groups.size()); ++gid) { + if (feature_groups[gid]->is_multi_val_) { + ncol += feature_groups[gid]->num_feature_; + } else { + ++ncol; + } + for (int fid = 0; fid < feature_groups[gid]->num_feature_; ++fid) { + const auto& bin_mapper = feature_groups[gid]->bin_mappers_[fid]; + sum_dense_ratio += 1.0f - bin_mapper->sparse_rate(); + } + } + sum_dense_ratio /= ncol; + const bool is_sparse_row_wise = (1.0f - sum_dense_ratio) >= + MultiValBin::multi_val_bin_sparse_threshold ? 1 : 0; + if (is_sparse_row_wise) { + int cur_num_bin = 1; + uint32_t hist_cur_num_bin = 1; + for (int group = 0; group < static_cast(feature_groups.size()); ++group) { + const std::unique_ptr& feature_group = feature_groups[group]; + if (feature_group->is_multi_val_) { + for (int i = 0; i < feature_group->num_feature_; ++i) { + offsets->push_back(cur_num_bin); + feature_hist_offsets_.push_back(hist_cur_num_bin); + const std::unique_ptr& bin_mapper = feature_group->bin_mappers_[i]; + int num_bin = bin_mapper->num_bin(); + if (bin_mapper->GetMostFreqBin() == 0) { + num_bin -= 1; + } + cur_num_bin += num_bin; + hist_cur_num_bin += num_bin; + } + } else { + offsets->push_back(cur_num_bin); + cur_num_bin += feature_group->bin_offsets_.back() - 1; + for (int i = 0; i < feature_group->num_feature_; ++i) { + feature_hist_offsets_.push_back(hist_cur_num_bin + feature_group->bin_offsets_[i] - 1); + } + hist_cur_num_bin += feature_group->bin_offsets_.back() - 1; + } + } + offsets->push_back(cur_num_bin); + feature_hist_offsets_.push_back(hist_cur_num_bin); + } else { + int cur_num_bin = 0; + uint32_t hist_cur_num_bin = 0; + for (int group = 0; group < static_cast(feature_groups.size()); ++group) { + const std::unique_ptr& feature_group = feature_groups[group]; + if (feature_group->is_multi_val_) { + for (int i = 0; i < feature_group->num_feature_; ++i) { + const std::unique_ptr& bin_mapper = feature_group->bin_mappers_[i]; + if (group == 0 && i == 0 && bin_mapper->GetMostFreqBin() > 0) { + cur_num_bin += 1; + hist_cur_num_bin += 1; + } + offsets->push_back(cur_num_bin); + feature_hist_offsets_.push_back(hist_cur_num_bin); + int num_bin = bin_mapper->num_bin(); + cur_num_bin += num_bin; + hist_cur_num_bin += num_bin; + if (bin_mapper->GetMostFreqBin() == 0) { + feature_hist_offsets_.back() += 1; + } + } + } else { + offsets->push_back(cur_num_bin); + cur_num_bin += feature_group->bin_offsets_.back(); + for (int i = 0; i < feature_group->num_feature_; ++i) { + feature_hist_offsets_.push_back(hist_cur_num_bin + feature_group->bin_offsets_[i]); + } + hist_cur_num_bin += feature_group->bin_offsets_.back(); + } + } + offsets->push_back(cur_num_bin); + feature_hist_offsets_.push_back(hist_cur_num_bin); + } + num_hist_total_bin_ = static_cast(feature_hist_offsets_.back()); + } +} + +void TrainingShareStates::SetMultiValBin(MultiValBin* bin, data_size_t num_data, + const std::vector>& feature_groups, + bool dense_only, bool sparse_only) { + num_threads = OMP_NUM_THREADS(); + if (bin == nullptr) { + return; + } + std::vector feature_groups_contained; + for (int group = 0; group < static_cast(feature_groups.size()); ++group) { + const auto& feature_group = feature_groups[group]; + if (feature_group->is_multi_val_) { + if (!dense_only) { + feature_groups_contained.push_back(group); + } + } else if (!sparse_only) { + feature_groups_contained.push_back(group); + } + } + num_total_bin_ += bin->num_bin(); + num_elements_per_row_ += bin->num_element_per_row(); + multi_val_bin_wrapper_.reset(new MultiValBinWrapper( + bin, num_data, feature_groups_contained)); +} + +} // namespace LightGBM diff --git a/src/treelearner/data_parallel_tree_learner.cpp b/src/treelearner/data_parallel_tree_learner.cpp index 46ee2aabad2d..549c7a96d59a 100644 --- a/src/treelearner/data_parallel_tree_learner.cpp +++ b/src/treelearner/data_parallel_tree_learner.cpp @@ -30,7 +30,7 @@ void DataParallelTreeLearner::Init(const Dataset* train_data, boo auto max_cat_threshold = this->config_->max_cat_threshold; // need to be able to hold smaller and larger best splits in SyncUpGlobalBestSplit size_t split_info_size = static_cast(SplitInfo::Size(max_cat_threshold) * 2); - size_t histogram_size = static_cast(this->train_data_->NumTotalBin() * kHistEntrySize); + size_t histogram_size = static_cast(this->share_state_->num_hist_total_bin() * kHistEntrySize); // allocate buffer for communication size_t buffer_size = std::max(histogram_size, split_info_size); diff --git a/src/treelearner/feature_histogram.hpp b/src/treelearner/feature_histogram.hpp index 5d7595c35c3b..2fd74fd2b000 100644 --- a/src/treelearner/feature_histogram.hpp +++ b/src/treelearner/feature_histogram.hpp @@ -1173,36 +1173,9 @@ class HistogramPool { } } - static int GetNumTotalHistogramBins(const Dataset* train_data, - bool is_hist_colwise, std::vector* offsets) { - int num_total_bin = static_cast(train_data->NumTotalBin()); - offsets->clear(); - if (is_hist_colwise) { - int offset = 0; - for (int j = 0; j < train_data->num_features(); ++j) { - offset += train_data->SubFeatureBinOffset(j); - offsets->push_back(offset); - auto num_bin = train_data->FeatureNumBin(j); - if (train_data->FeatureBinMapper(j)->GetMostFreqBin() == 0) { - num_bin -= 1; - } - offset += num_bin; - } - } else { - num_total_bin = 1; - for (int j = 0; j < train_data->num_features(); ++j) { - offsets->push_back(num_total_bin); - num_total_bin += train_data->FeatureBinMapper(j)->num_bin(); - if (train_data->FeatureBinMapper(j)->GetMostFreqBin() == 0) { - num_total_bin -= 1; - } - } - } - return num_total_bin; - } - - void DynamicChangeSize(const Dataset* train_data, bool is_hist_colwise, - const Config* config, int cache_size, int total_size) { + void DynamicChangeSize(const Dataset* train_data, int num_total_bin, + const std::vector& offsets, const Config* config, + int cache_size, int total_size) { if (feature_metas_.empty()) { SetFeatureInfo(train_data, config, &feature_metas_); uint64_t bin_cnt_over_features = 0; @@ -1219,9 +1192,6 @@ class HistogramPool { pool_.resize(cache_size); data_.resize(cache_size); } - std::vector offsets; - int num_total_bin = - this->GetNumTotalHistogramBins(train_data, is_hist_colwise, &offsets); OMP_INIT_EX(); #pragma omp parallel for schedule(static) for (int i = old_cache_size; i < cache_size; ++i) { diff --git a/src/treelearner/serial_tree_learner.cpp b/src/treelearner/serial_tree_learner.cpp index 1dd296318fab..cfa7d9b38b99 100644 --- a/src/treelearner/serial_tree_learner.cpp +++ b/src/treelearner/serial_tree_learner.cpp @@ -60,7 +60,10 @@ void SerialTreeLearner::Init(const Dataset* train_data, bool is_constant_hessian ordered_hessians_.resize(num_data_); GetShareStates(train_data_, is_constant_hessian, true); - histogram_pool_.DynamicChangeSize(train_data_, share_state_->is_colwise, config_, max_cache_size, config_->num_leaves); + histogram_pool_.DynamicChangeSize(train_data_, + share_state_->num_hist_total_bin(), + share_state_->feature_hist_offsets(), + config_, max_cache_size, config_->num_leaves); Log::Info("Number of data points in the train set: %d, number of used features: %d", num_data_, num_features_); if (CostEfficientGradientBoosting::IsEnable(config_)) { cegb_.reset(new CostEfficientGradientBoosting(this)); @@ -81,8 +84,8 @@ void SerialTreeLearner::GetShareStates(const Dataset* dataset, // cannot change is_hist_col_wise during training share_state_.reset(dataset->GetShareStates( ordered_gradients_.data(), ordered_hessians_.data(), col_sampler_.is_feature_used_bytree(), - is_constant_hessian, share_state_->is_colwise, - !share_state_->is_colwise)); + is_constant_hessian, share_state_->is_col_wise, + !share_state_->is_col_wise)); } CHECK_NOTNULL(share_state_); } @@ -130,7 +133,10 @@ void SerialTreeLearner::ResetConfig(const Config* config) { // at least need 2 leaves max_cache_size = std::max(2, max_cache_size); max_cache_size = std::min(max_cache_size, config_->num_leaves); - histogram_pool_.DynamicChangeSize(train_data_, share_state_->is_colwise, config_, max_cache_size, config_->num_leaves); + histogram_pool_.DynamicChangeSize(train_data_, + share_state_->num_hist_total_bin(), + share_state_->feature_hist_offsets(), + config_, max_cache_size, config_->num_leaves); // push split information for all leaves best_split_per_leaf_.resize(config_->num_leaves); @@ -351,7 +357,6 @@ void SerialTreeLearner::ConstructHistograms( smaller_leaf_splits_->num_data_in_leaf(), gradients_, hessians_, ordered_gradients_.data(), ordered_hessians_.data(), share_state_.get(), ptr_smaller_leaf_hist_data); - if (larger_leaf_histogram_array_ != nullptr && !use_subtract) { // construct larger leaf hist_t* ptr_larger_leaf_hist_data = diff --git a/src/treelearner/serial_tree_learner.h b/src/treelearner/serial_tree_learner.h index ab254ed1202a..fd06c235a8bf 100644 --- a/src/treelearner/serial_tree_learner.h +++ b/src/treelearner/serial_tree_learner.h @@ -84,11 +84,11 @@ class SerialTreeLearner: public TreeLearner { void SetBaggingData(const Dataset* subset, const data_size_t* used_indices, data_size_t num_data) override { if (subset == nullptr) { data_partition_->SetUsedDataIndices(used_indices, num_data); - share_state_->is_use_subrow = false; + share_state_->SetUseSubrow(false); } else { ResetTrainingDataInner(subset, share_state_->is_constant_hessian, false); - share_state_->is_use_subrow = true; - share_state_->is_subrow_copied = false; + share_state_->SetUseSubrow(true); + share_state_->SetSubrowCopied(false); share_state_->bagging_use_indices = used_indices; share_state_->bagging_indices_cnt = num_data; } diff --git a/src/treelearner/voting_parallel_tree_learner.cpp b/src/treelearner/voting_parallel_tree_learner.cpp index 430f47e07e17..ab4adeae62d9 100644 --- a/src/treelearner/voting_parallel_tree_learner.cpp +++ b/src/treelearner/voting_parallel_tree_learner.cpp @@ -67,9 +67,8 @@ void VotingParallelTreeLearner::Init(const Dataset* train_data, b // initialize histograms for global smaller_leaf_histogram_array_global_.reset(new FeatureHistogram[this->num_features_]); larger_leaf_histogram_array_global_.reset(new FeatureHistogram[this->num_features_]); - std::vector offsets; - int num_total_bin = HistogramPool::GetNumTotalHistogramBins( - train_data, this->share_state_->is_colwise, &offsets); + std::vector offsets = this->share_state_->feature_hist_offsets(); + int num_total_bin = this->share_state_->num_hist_total_bin(); smaller_leaf_histogram_data_.resize(num_total_bin * 2); larger_leaf_histogram_data_.resize(num_total_bin * 2); HistogramPool::SetFeatureInfo(train_data, this->config_, &feature_metas_); diff --git a/windows/LightGBM.vcxproj b/windows/LightGBM.vcxproj index 4c95af83b03c..6b37ca53b728 100644 --- a/windows/LightGBM.vcxproj +++ b/windows/LightGBM.vcxproj @@ -309,6 +309,7 @@ + diff --git a/windows/LightGBM.vcxproj.filters b/windows/LightGBM.vcxproj.filters index 9490e3655387..b721af3fb187 100644 --- a/windows/LightGBM.vcxproj.filters +++ b/windows/LightGBM.vcxproj.filters @@ -317,5 +317,8 @@ src\io + + src\io + \ No newline at end of file