Skip to content

Commit

Permalink
Support both row-wise and col-wise multi-threading (#2699)
Browse files Browse the repository at this point in the history
* commit

* fix a bug

* fix bug

* reset to track changes

* refine the auto choose logic

* sort the time stats output

* fix include

* change  multi_val_bin_sparse_threshold

* add cmake

* add _mm_malloc and _mm_free for cross platform

* fix cmake bug

* timer for split

* try to fix cmake

* fix tests

* refactor DataPartition::Split

* fix test

* typo

* formating

* Revert "formating"

This reverts commit 5b8de4f.

* add document

* [R-package] Added tests on use of force_col_wise and force_row_wise in training (#2719)

* naming

* fix gpu code

* Update include/LightGBM/bin.h

Co-Authored-By: James Lamb <jaylamb20@gmail.com>

* Update src/treelearner/ocl/histogram16.cl

* test: swap compilers for CI

* fix omp

* not avx2

* no aligned for feature histogram

* Revert "refactor DataPartition::Split"

This reverts commit 256e6d9.

* slightly refactor data partition

* reduce the memory cost

Co-authored-by: James Lamb <jaylamb20@gmail.com>
Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
  • Loading branch information
3 people authored Feb 2, 2020
1 parent bc7bc4a commit 509c2e5
Show file tree
Hide file tree
Showing 50 changed files with 2,195 additions and 1,499 deletions.
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ before_install:
- export BUILD_DIRECTORY="$TRAVIS_BUILD_DIR"
- if [[ $TRAVIS_OS_NAME == "osx" ]]; then
export OS_NAME="macos";
export COMPILER="gcc";
export COMPILER="clang";
export R_MAC_VERSION=3.6.1;
else
export OS_NAME="linux";
export COMPILER="clang";
export COMPILER="gcc";
export R_TRAVIS_LINUX_VERSION=3.6.1-3bionic;
fi
- export CONDA="$HOME/miniconda"
Expand Down
4 changes: 2 additions & 2 deletions .vsts-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
- job: Linux
###########################################
variables:
COMPILER: gcc
COMPILER: clang
pool:
vmImage: 'ubuntu-16.04'
container: ubuntu1404
Expand Down Expand Up @@ -72,7 +72,7 @@ jobs:
- job: MacOS
###########################################
variables:
COMPILER: clang
COMPILER: gcc
pool:
vmImage: 'macOS-10.13'
strategy:
Expand Down
19 changes: 19 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ if(USE_R35)
ADD_DEFINITIONS(-DR_VER_ABOVE_35)
endif(USE_R35)

if(USE_TIMETAG)
ADD_DEFINITIONS(-DTIMETAG)
endif(USE_TIMETAG)

if(USE_MPI)
find_package(MPI REQUIRED)
ADD_DEFINITIONS(-DUSE_MPI)
Expand Down Expand Up @@ -130,6 +134,21 @@ if(${MM_PREFETCH})
ADD_DEFINITIONS(-DMM_PREFETCH)
endif()

include(CheckCXXSourceCompiles)
check_cxx_source_compiles("
#include <mm_malloc.h>
int main() {
char *a = (char*)_mm_malloc(8, 16);
_mm_free(a);
return 0;
}
" MM_MALLOC)

if(${MM_MALLOC})
message(STATUS "Use _mm_malloc")
ADD_DEFINITIONS(-DMM_MALLOC)
endif()

if(UNIX OR MINGW OR CYGWIN)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -pthread -O3 -Wextra -Wall -Wno-ignored-attributes -Wno-unknown-pragmas -Wno-return-type")
if(USE_SWIG)
Expand Down
43 changes: 43 additions & 0 deletions R-package/tests/testthat/test_basic.R
Original file line number Diff line number Diff line change
Expand Up @@ -252,3 +252,46 @@ test_that("lgb.train() throws an informative error if 'valids' contains lgb.Data
)
}, regexp = "each element of valids must have a name")
})

test_that("lgb.train() works with force_col_wise and force_row_wise", {
set.seed(1234L)
nrounds <- 10L
dtrain <- lgb.Dataset(
train$data
, label = train$label
)
params <- list(
objective = "binary"
, metric = "binary_error"
, force_col_wise = TRUE
)
bst_colwise <- lgb.train(
params = params
, data = dtrain
, nrounds = nrounds
)

params <- list(
objective = "binary"
, metric = "binary_error"
, force_row_wise = TRUE
)
bst_row_wise <- lgb.train(
params = params
, data = dtrain
, nrounds = nrounds
)

expected_error <- 0.003070782
expect_equal(bst_colwise$eval_train()[[1L]][["value"]], expected_error)
expect_equal(bst_row_wise$eval_train()[[1L]][["value"]], expected_error)

# check some basic details of the boosters just to be sure force_col_wise
# and force_row_wise are not causing any weird side effects
for (bst in list(bst_row_wise, bst_colwise)) {
expect_equal(bst$current_iter(), nrounds)
parsed_model <- jsonlite::fromJSON(bst$dump_model())
expect_equal(parsed_model$objective, "binary sigmoid:1")
expect_false(parsed_model$average_output)
}
})
4 changes: 2 additions & 2 deletions R-package/tests/testthat/test_learning_to_rank.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ test_that("learning-to-rank with lgb.train() works as expected", {
}
expect_identical(sapply(eval_results, function(x) {x$name}), eval_names)
expect_equal(eval_results[[1L]][["value"]], 0.825)
expect_true(abs(eval_results[[2L]][["value"]] - 0.795986) < TOLERANCE)
expect_true(abs(eval_results[[3L]][["value"]] - 0.7734639) < TOLERANCE)
expect_true(abs(eval_results[[2L]][["value"]] - 0.7766434) < TOLERANCE)
expect_true(abs(eval_results[[3L]][["value"]] - 0.7527939) < TOLERANCE)
})

test_that("learning-to-rank with lgb.cv() works as expected", {
Expand Down
48 changes: 32 additions & 16 deletions docs/Parameters.rst
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,38 @@ Core Parameters
Learning Control Parameters
---------------------------

- ``force_col_wise`` :raw-html:`<a id="force_col_wise" title="Permalink to this parameter" href="#force_col_wise">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool

- set ``force_col_wise=true`` will force LightGBM to use col-wise histogram build

- Recommend ``force_col_wise=true`` when:

- the number of columns is large, or the total number of bin is large

- when ``num_threads`` is large, e.g. ``>20``

- want to use small ``feature_fraction``, e.g. ``0.5``, to speed-up

- want to reduce memory cost

- when both ``force_col_wise`` and ``force_col_wise`` are ``false``, LightGBM will firstly try them both, and uses the faster one

- ``force_row_wise`` :raw-html:`<a id="force_row_wise" title="Permalink to this parameter" href="#force_row_wise">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool

- set ``force_row_wise=true`` will force LightGBM to use row-wise histogram build

- Recommend ``force_row_wise=true`` when:

- the number of data is large, and the number of total bin is relatively small

- want to use small ``bagging``, or ``goss``, to speed-up

- when ``num_threads`` is relatively small, e.g. ``<=16``

- set ``force_row_wise=true`` will double the memory cost for Dataset object, if your memory is not enough, you can try ``force_col_wise=true``

- when both ``force_col_wise`` and ``force_col_wise`` are ``false``, LightGBM will firstly try them both, and uses the faster one.

- ``max_depth`` :raw-html:`<a id="max_depth" title="Permalink to this parameter" href="#max_depth">&#x1F517;&#xFE0E;</a>`, default = ``-1``, type = int

- limit the max depth for tree model. This is used to deal with over-fitting when ``#data`` is small. Tree still grows leaf-wise
Expand Down Expand Up @@ -559,22 +591,6 @@ IO Parameters

- **Note**: disabling this may cause the slow training speed for sparse datasets

- ``max_conflict_rate`` :raw-html:`<a id="max_conflict_rate" title="Permalink to this parameter" href="#max_conflict_rate">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, constraints: ``0.0 <= max_conflict_rate < 1.0``

- max conflict rate for bundles in EFB

- set this to ``0.0`` to disallow the conflict and provide more accurate results

- set this to a larger value to achieve faster speed

- ``is_enable_sparse`` :raw-html:`<a id="is_enable_sparse" title="Permalink to this parameter" href="#is_enable_sparse">&#x1F517;&#xFE0E;</a>`, default = ``true``, type = bool, aliases: ``is_sparse``, ``enable_sparse``, ``sparse``

- used to enable/disable sparse optimization

- ``sparse_threshold`` :raw-html:`<a id="sparse_threshold" title="Permalink to this parameter" href="#sparse_threshold">&#x1F517;&#xFE0E;</a>`, default = ``0.8``, type = double, constraints: ``0.0 < sparse_threshold <= 1.0``

- the threshold of zero elements percentage for treating a feature as a sparse one

- ``use_missing`` :raw-html:`<a id="use_missing" title="Permalink to this parameter" href="#use_missing">&#x1F517;&#xFE0E;</a>`, default = ``true``, type = bool

- set this to ``false`` to disable the special handle of missing value
Expand Down
122 changes: 68 additions & 54 deletions include/LightGBM/bin.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,36 +29,29 @@ enum MissingType {
NaN
};

/*! \brief Store data for one histogram bin */
struct HistogramBinEntry {
public:
/*! \brief Sum of gradients on this bin */
double sum_gradients = 0.0f;
/*! \brief Sum of hessians on this bin */
double sum_hessians = 0.0f;
/*! \brief Number of data on this bin */
data_size_t cnt = 0;
/*!
* \brief Sum up (reducers) functions for histogram bin
*/
inline static void SumReducer(const char *src, char *dst, int type_size, comm_size_t len) {
comm_size_t used_size = 0;
const HistogramBinEntry* p1;
HistogramBinEntry* p2;
while (used_size < len) {
// convert
p1 = reinterpret_cast<const HistogramBinEntry*>(src);
p2 = reinterpret_cast<HistogramBinEntry*>(dst);
// add
p2->cnt += p1->cnt;
p2->sum_gradients += p1->sum_gradients;
p2->sum_hessians += p1->sum_hessians;
src += type_size;
dst += type_size;
used_size += type_size;
}
typedef double hist_t;

const size_t KHistEntrySize = 2 * sizeof(hist_t);
const int KHistOffset = 2;
const double kSparseThreshold = 0.7;

#define GET_GRAD(hist, i) hist[(i) << 1]
#define GET_HESS(hist, i) hist[((i) << 1) + 1]

inline static void HistogramSumReducer(const char* src, char* dst, int type_size, comm_size_t len) {
comm_size_t used_size = 0;
const hist_t* p1;
hist_t* p2;
while (used_size < len) {
// convert
p1 = reinterpret_cast<const hist_t*>(src);
p2 = reinterpret_cast<hist_t*>(dst);
*p2 += *p1;
src += type_size;
dst += type_size;
used_size += type_size;
}
};
}

/*! \brief This class used to convert feature values into bin,
* and store some meta information for bin*/
Expand Down Expand Up @@ -252,7 +245,7 @@ class OrderedBin {
* \param out Output Result
*/
virtual void ConstructHistogram(int leaf, const score_t* gradients,
const score_t* hessians, HistogramBinEntry* out) const = 0;
const score_t* hessians, hist_t* out) const = 0;

/*!
* \brief Construct histogram by using this bin
Expand All @@ -262,7 +255,7 @@ class OrderedBin {
* \param gradients Gradients, Note:non-ordered by leaf
* \param out Output Result
*/
virtual void ConstructHistogram(int leaf, const score_t* gradients, HistogramBinEntry* out) const = 0;
virtual void ConstructHistogram(int leaf, const score_t* gradients, hist_t* out) const = 0;

/*!
* \brief Split current bin, and perform re-order by leaf
Expand Down Expand Up @@ -360,11 +353,11 @@ class Bin {
virtual void ConstructHistogram(
const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians,
HistogramBinEntry* out) const = 0;
hist_t* out) const = 0;

virtual void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, const score_t* ordered_hessians,
HistogramBinEntry* out) const = 0;
hist_t* out) const = 0;

/*!
* \brief Construct histogram of this feature,
Expand All @@ -380,10 +373,10 @@ class Bin {
* \param out Output Result
*/
virtual void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, HistogramBinEntry* out) const = 0;
const score_t* ordered_gradients, hist_t* out) const = 0;

virtual void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, HistogramBinEntry* out) const = 0;
const score_t* ordered_gradients, hist_t* out) const = 0;

/*!
* \brief Split data according to threshold, if bin <= threshold, will put into left(lte_indices), else put into right(gt_indices)
Expand Down Expand Up @@ -423,30 +416,11 @@ class Bin {
data_size_t* data_indices, data_size_t num_data,
data_size_t* lte_indices, data_size_t* gt_indices) const = 0;

/*!
* \brief Create the ordered bin for this bin
* \return Pointer to ordered bin
*/
virtual OrderedBin* CreateOrderedBin() const = 0;

/*!
* \brief After pushed all feature data, call this could have better refactor for bin data
*/
virtual void FinishLoad() = 0;

/*!
* \brief Create object for bin data of one feature, will call CreateDenseBin or CreateSparseBin according to "is_sparse"
* \param num_data Total number of data
* \param num_bin Number of bin
* \param sparse_rate Sparse rate of this bins( num_bin0/num_data )
* \param is_enable_sparse True if enable sparse feature
* \param sparse_threshold Threshold for treating a feature as a sparse feature
* \param is_sparse Will set to true if this bin is sparse
* \return The bin data object
*/
static Bin* CreateBin(data_size_t num_data, int num_bin,
double sparse_rate, bool is_enable_sparse, double sparse_threshold, bool* is_sparse);

/*!
* \brief Create object for bin data of one feature, used for dense feature
* \param num_data Total number of data
Expand All @@ -469,6 +443,46 @@ class Bin {
virtual Bin* Clone() = 0;
};


class MultiValBin {
public:

virtual ~MultiValBin() {}

virtual data_size_t num_data() const = 0;

virtual int32_t num_bin() const = 0;

virtual void ReSize(data_size_t num_data) = 0;

virtual void PushOneRow(int tid, data_size_t idx, const std::vector<uint32_t>& values) = 0;

virtual void CopySubset(const Bin* full_bin, const data_size_t* used_indices, data_size_t num_used_indices) = 0;

virtual void ConstructHistogram(
const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* gradients, const score_t* hessians,
hist_t* out) const = 0;

virtual void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* gradients, const score_t* hessians,
hist_t* out) const = 0;

virtual void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, hist_t* out) const = 0;

virtual void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, hist_t* out) const = 0;

virtual void FinishLoad() = 0;

virtual bool IsSparse() = 0;

static MultiValBin* CreateMultiValBin(data_size_t num_data, int num_bin, int num_feature, double sparse_rate);

virtual MultiValBin* Clone() = 0;
};

inline uint32_t BinMapper::ValueToBin(double value) const {
if (std::isnan(value)) {
if (missing_type_ == MissingType::NaN) {
Expand Down
Loading

0 comments on commit 509c2e5

Please sign in to comment.