microsoft · guolinke · Nov 13, 2020 · Sep 17, 2020 · Sep 17, 2020 · Sep 17, 2020
@@ -36,6 +36,7 @@ OBJECTS = \
     io/json11.o \
     io/metadata.o \
     io/parser.o \
+    io/train_share_states.o \
     io/tree.o \
     metric/dcg_calculator.o \
     metric/metric.o \

@@ -37,6 +37,7 @@ OBJECTS = \
     io/json11.o \
     io/metadata.o \
     io/parser.o \
+    io/train_share_states.o \
     io/tree.o \
     metric/dcg_calculator.o \
     metric/metric.o \

@@ -399,6 +399,7 @@ class MultiValBin {
 
   virtual double num_element_per_row() const = 0;
 
+  virtual const std::vector<uint32_t>& offsets() const = 0;
 
   virtual void PushOneRow(int tid, data_size_t idx, const std::vector<uint32_t>& values) = 0;
 
@@ -408,7 +409,8 @@ class MultiValBin {
 
   virtual MultiValBin* CreateLike(data_size_t num_data, int num_bin,
                                   int num_feature,
-                                  double estimate_element_per_row) const = 0;
+                                  double estimate_element_per_row,
+                                  const std::vector<uint32_t>& offsets) const = 0;
 
   virtual void CopySubcol(const MultiValBin* full_bin,
                           const std::vector<int>& used_feature_index,
@@ -417,7 +419,7 @@ class MultiValBin {
                           const std::vector<uint32_t>& delta) = 0;
 
   virtual void ReSize(data_size_t num_data, int num_bin, int num_feature,
-                      double estimate_element_per_row) = 0;
+                      double estimate_element_per_row, const std::vector<uint32_t>& offsets) = 0;
 
   virtual void CopySubrowAndSubcol(
       const MultiValBin* full_bin, const data_size_t* used_indices,
@@ -447,13 +449,15 @@ class MultiValBin {
   virtual bool IsSparse() = 0;
 
   static MultiValBin* CreateMultiValBin(data_size_t num_data, int num_bin,
-                                        int num_feature, double sparse_rate);
+                                        int num_feature, double sparse_rate, const std::vector<uint32_t>& offsets);
 
   static MultiValBin* CreateMultiValDenseBin(data_size_t num_data, int num_bin,
-                                             int num_feature);
+                                             int num_feature, const std::vector<uint32_t>& offsets);
 
   static MultiValBin* CreateMultiValSparseBin(data_size_t num_data, int num_bin, double estimate_element_per_row);
 
+  static constexpr double multi_val_bin_sparse_threshold = 0.25f;
+
   virtual MultiValBin* Clone() = 0;
 };
 

diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
@@ -235,8 +235,8 @@ struct Config {
   // descl2 = the number of columns is large, or the total number of bins is large
   // descl2 = ``num_threads`` is large, e.g. ``> 20``
   // descl2 = you want to reduce memory cost
-  // desc = **Note**: when both ``force_col_wise`` and ``force_row_wise`` are ``false``, LightGBM will firstly try them both, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
-  // desc = **Note**: this parameter cannot be used at the same time with ``force_row_wise``, choose only one of them
+  // desc = **Note**: when both ``force_col_wise``, ``force_row_wise`` and ``force_two_row_wise`` are ``false``, LightGBM will firstly try them all, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
+  // desc = **Note**: this parameter cannot be used at the same time with ``force_row_wise`` and ``force_two_row_wise``, choose only one of them
   bool force_col_wise = false;
 
   // desc = used only with ``cpu`` device type
@@ -246,10 +246,22 @@ struct Config {
   // descl2 = ``num_threads`` is relatively small, e.g. ``<= 16``
   // descl2 = you want to use small ``bagging_fraction`` or ``goss`` boosting to speed up
   // desc = **Note**: setting this to ``true`` will double the memory cost for Dataset object. If you have not enough memory, you can try setting ``force_col_wise=true``
-  // desc = **Note**: when both ``force_col_wise`` and ``force_row_wise`` are ``false``, LightGBM will firstly try them both, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
-  // desc = **Note**: this parameter cannot be used at the same time with ``force_col_wise``, choose only one of them
+  // desc = **Note**: when both ``force_col_wise``, ``force_row_wise`` and ``force_two_row_wise`` are ``false``, LightGBM will firstly try them all, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
+  // desc = **Note**: this parameter cannot be used at the same time with ``force_col_wise`` and ``force_two_row_wise``, choose only one of them
   bool force_row_wise = false;
 
+  // desc = used only with ``cpu`` device type
+  // desc = set this to ``true`` to force separate row-wise histogram building for sparse and dense features
+  // desc = enabling this is recommended when:
+  // descl2 = there are both sparse and dense features in the dataset
+  // descl2 = the number of data points is large, and the total number of bins is relatively small
+  // descl2 = ``num_threads`` is relatively small, e.g. ``<= 16``
+  // descl2 = you want to use small ``bagging_fraction`` or ``goss`` boosting to speed up
+  // desc = **Note**: setting this to ``true`` will double the memory cost for Dataset object. If you have not enough memory, you can try setting ``force_col_wise=true``
+  // desc = **Note**: when both ``force_col_wise``, ``force_row_wise`` and ``force_two_row_wise`` are ``false``, LightGBM will firstly try them all, and then use the faster one. To remove the overhead of testing set the faster one to ``true`` manually
+  // desc = **Note**: this parameter cannot be used at the same time with ``force_col_wise`` and ``force_row_wise``, choose only one of them
+  bool force_two_row_wise = false;
+
   // alias = hist_pool_size
   // desc = max cache size in MB for historical histogram
   // desc = ``< 0`` means no limit

@@ -8,6 +8,7 @@
 #include <LightGBM/config.h>
 #include <LightGBM/feature_group.h>
 #include <LightGBM/meta.h>
+#include <LightGBM/train_share_states.h>
 #include <LightGBM/utils/openmp_wrapper.h>
 #include <LightGBM/utils/random.h>
 #include <LightGBM/utils/text_reader.h>
@@ -275,57 +276,6 @@ class Parser {
   static Parser* CreateParser(const char* filename, bool header, int num_features, int label_idx);
 };
 
-struct TrainingShareStates {
-  int num_threads = 0;
-  bool is_colwise = true;
-  bool is_use_subcol = false;
-  bool is_use_subrow = false;
-  bool is_subrow_copied = false;
-  bool is_constant_hessian = true;
-  const data_size_t* bagging_use_indices;
-  data_size_t bagging_indices_cnt;
-  int num_bin_aligned;
-  std::unique_ptr<MultiValBin> multi_val_bin;
-  std::unique_ptr<MultiValBin> multi_val_bin_subset;
-  std::vector<uint32_t> hist_move_src;
-  std::vector<uint32_t> hist_move_dest;
-  std::vector<uint32_t> hist_move_size;
-  std::vector<hist_t, Common::AlignmentAllocator<hist_t, kAlignedSize>>
-      hist_buf;
-
-  void SetMultiValBin(MultiValBin* bin) {
-    num_threads = OMP_NUM_THREADS();
-    if (bin == nullptr) {
-      return;
-    }
-    multi_val_bin.reset(bin);
-    num_bin_aligned =
-        (bin->num_bin() + kAlignedSize - 1) / kAlignedSize * kAlignedSize;
-    size_t new_size = static_cast<size_t>(num_bin_aligned) * 2 * num_threads;
-    if (new_size > hist_buf.size()) {
-      hist_buf.resize(static_cast<size_t>(num_bin_aligned) * 2 * num_threads);
-    }
-  }
-
-  hist_t* TempBuf() {
-    if (!is_use_subcol) {
-      return nullptr;
-    }
-    return hist_buf.data() + hist_buf.size() - num_bin_aligned * 2;
-  }
-
-  void HistMove(const hist_t* src, hist_t* dest) {
-    if (!is_use_subcol) {
-      return;
-    }
-#pragma omp parallel for schedule(static)
-    for (int i = 0; i < static_cast<int>(hist_move_src.size()); ++i) {
-      std::copy_n(src + hist_move_src[i], hist_move_size[i],
-                  dest + hist_move_dest[i]);
-    }
-  }
-};
-
 /*! \brief The main class of data set,
 *          which are used to training or validation
 */
@@ -444,14 +394,16 @@ class Dataset {
 
   void CopySubrow(const Dataset* fullset, const data_size_t* used_indices, data_size_t num_used_indices, bool need_meta_data);
 
-  MultiValBin* GetMultiBinFromSparseFeatures() const;
+  MultiValBin* GetMultiBinFromSparseFeatures(const std::vector<uint32_t>& offsets) const;
+
+  MultiValBin* GetMultiBinFromDenseFeatures(const std::vector<uint32_t>& offsets) const;
 
-  MultiValBin* GetMultiBinFromAllFeatures() const;
+  MultiValBin* GetMultiBinFromAllFeatures(const std::vector<uint32_t>& offsets) const;
 
   TrainingShareStates* GetShareStates(
       score_t* gradients, score_t* hessians,
       const std::vector<int8_t>& is_feature_used, bool is_constant_hessian,
-      bool force_colwise, bool force_rowwise) const;
+      bool force_colwise, bool force_rowwise, bool force_two_rowwise) const;
 
   LIGHTGBM_EXPORT void FinishLoad();
 

@@ -18,12 +18,16 @@ namespace LightGBM {
 
 class Dataset;
 class DatasetLoader;
+class TrainingShareStates;
+class MultiValBinWrapper;
 /*! \brief Using to store data and providing some operations on one feature
  * group*/
 class FeatureGroup {
  public:
   friend Dataset;
   friend DatasetLoader;
+  friend TrainingShareStates;
+  friend MultiValBinWrapper;
   /*!
   * \brief Constructor
   * \param num_feature number of features of this group
@@ -35,15 +39,27 @@ class FeatureGroup {
     std::vector<std::unique_ptr<BinMapper>>* bin_mappers,
     data_size_t num_data) : num_feature_(num_feature), is_multi_val_(is_multi_val > 0), is_sparse_(false) {
     CHECK_EQ(static_cast<int>(bin_mappers->size()), num_feature);
-    // use bin at zero to store most_freq_bin
-    num_total_bin_ = 1;
-    bin_offsets_.emplace_back(num_total_bin_);
     auto& ref_bin_mappers = *bin_mappers;
+    double sum_sparse_rate = 0.0f;
     for (int i = 0; i < num_feature_; ++i) {
       bin_mappers_.emplace_back(ref_bin_mappers[i].release());
+      sum_sparse_rate += bin_mappers_.back()->sparse_rate();
+    }
+    sum_sparse_rate /= num_feature_;
+    int offset = 1;
+    is_dense_multi_val_ = false;
+    if (sum_sparse_rate < MultiValBin::multi_val_bin_sparse_threshold && is_multi_val_) {
+      // use dense multi val bin
+      offset = 0;
+      is_dense_multi_val_ = true;
+    }
+    // use bin at zero to store most_freq_bin only when not using dense multi val bin
+    num_total_bin_ = offset;
+    bin_offsets_.emplace_back(num_total_bin_);
+    for (int i = 0; i < num_feature_; ++i) {
       auto num_bin = bin_mappers_[i]->num_bin();
       if (bin_mappers_[i]->GetMostFreqBin() == 0) {
-        num_bin -= 1;
+        num_bin -= offset;
       }
       num_total_bin_ += num_bin;
       bin_offsets_.emplace_back(num_total_bin_);
@@ -54,6 +70,7 @@ class FeatureGroup {
   FeatureGroup(const FeatureGroup& other, int num_data) {
     num_feature_ = other.num_feature_;
     is_multi_val_ = other.is_multi_val_;
+    is_dense_multi_val_ = other.is_dense_multi_val_;
     is_sparse_ = other.is_sparse_;
     num_total_bin_ = other.num_total_bin_;
     bin_offsets_ = other.bin_offsets_;
@@ -70,6 +87,7 @@ class FeatureGroup {
     CHECK_EQ(static_cast<int>(bin_mappers->size()), 1);
     // use bin at zero to store default_bin
     num_total_bin_ = 1;
+    is_dense_multi_val_ = false;
     bin_offsets_.emplace_back(num_total_bin_);
     auto& ref_bin_mappers = *bin_mappers;
     for (int i = 0; i < num_feature_; ++i) {
@@ -96,6 +114,8 @@ class FeatureGroup {
     // get is_sparse
     is_multi_val_ = *(reinterpret_cast<const bool*>(memory_ptr));
     memory_ptr += VirtualFileWriter::AlignedSize(sizeof(is_multi_val_));
+    is_dense_multi_val_ = *(reinterpret_cast<const bool*>(memory_ptr));
+    memory_ptr += VirtualFileWriter::AlignedSize(sizeof(is_dense_multi_val_));
     is_sparse_ = *(reinterpret_cast<const bool*>(memory_ptr));
     memory_ptr += VirtualFileWriter::AlignedSize(sizeof(is_sparse_));
     num_feature_ = *(reinterpret_cast<const int*>(memory_ptr));
@@ -193,15 +213,41 @@ class FeatureGroup {
   void AddFeaturesFrom(const FeatureGroup* other) {
     CHECK(is_multi_val_);
     CHECK(other->is_multi_val_);
+    // every time when new features are added, we need to reconsider sparse or dense
+    double sum_sparse_rate = 0.0f;
+    for (int i = 0; i < num_feature_; ++i) {
+      sum_sparse_rate += bin_mappers_[i]->sparse_rate();
+    }
+    for (int i = 0; i < other->num_feature_; ++i) {
+      sum_sparse_rate += other->bin_mappers_[i]->sparse_rate();
+    }
+    sum_sparse_rate /= (num_feature_ + other->num_feature_);
+    int offset = 1;
+    is_dense_multi_val_ = false;
+    if (sum_sparse_rate < MultiValBin::multi_val_bin_sparse_threshold && is_multi_val_) {
+      // use dense multi val bin
+      offset = 0;
+      is_dense_multi_val_ = true;
+    }
+    bin_offsets_.clear();
+    num_total_bin_ = offset;
+    bin_offsets_.emplace_back(num_total_bin_);
+    for (int i = 0; i < num_feature_; ++i) {
+      auto num_bin = bin_mappers_[i]->num_bin();
+      if (bin_mappers_[i]->GetMostFreqBin() == 0) {
+        num_bin -= offset;
+      }
+      num_total_bin_ += num_bin;
+      bin_offsets_.emplace_back(num_total_bin_);
+    }
     for (int i = 0; i < other->num_feature_; ++i) {
       const auto& other_bin_mapper = other->bin_mappers_[i];
       bin_mappers_.emplace_back(new BinMapper(*other_bin_mapper));
       auto num_bin = other_bin_mapper->num_bin();
       if (other_bin_mapper->GetMostFreqBin() == 0) {
-        num_bin -= 1;
+        num_bin -= offset;
       }
       num_total_bin_ += num_bin;
-      bin_offsets_.emplace_back(num_total_bin_);
       multi_bin_data_.emplace_back(other->multi_bin_data_[i]->Clone());
     }
     num_feature_ += other->num_feature_;
@@ -321,6 +367,7 @@ class FeatureGroup {
    */
   void SaveBinaryToFile(const VirtualFileWriter* writer) const {
     writer->AlignedWrite(&is_multi_val_, sizeof(is_multi_val_));
+    writer->AlignedWrite(&is_dense_multi_val_, sizeof(is_dense_multi_val_));
     writer->AlignedWrite(&is_sparse_, sizeof(is_sparse_));
     writer->AlignedWrite(&num_feature_, sizeof(num_feature_));
     for (int i = 0; i < num_feature_; ++i) {
@@ -340,6 +387,7 @@ class FeatureGroup {
    */
   size_t SizesInByte() const {
     size_t ret = VirtualFileWriter::AlignedSize(sizeof(is_multi_val_)) +
+                 VirtualFileWriter::AlignedSize(sizeof(is_dense_multi_val_)) +
                  VirtualFileWriter::AlignedSize(sizeof(is_sparse_)) +
                  VirtualFileWriter::AlignedSize(sizeof(num_feature_));
     for (int i = 0; i < num_feature_; ++i) {
@@ -362,6 +410,7 @@ class FeatureGroup {
   FeatureGroup(const FeatureGroup& other) {
     num_feature_ = other.num_feature_;
     is_multi_val_ = other.is_multi_val_;
+    is_dense_multi_val_ = other.is_dense_multi_val_;
     is_sparse_ = other.is_sparse_;
     num_total_bin_ = other.num_total_bin_;
     bin_offsets_ = other.bin_offsets_;
@@ -420,6 +469,7 @@ class FeatureGroup {
   std::vector<std::unique_ptr<Bin>> multi_bin_data_;
   /*! \brief True if this feature is sparse */
   bool is_multi_val_;
+  bool is_dense_multi_val_;
   bool is_sparse_;
   int num_total_bin_;
 };