Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Small cleanup for histogram routines. #9427

Merged
merged 6 commits into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions R-package/src/Makevars.in
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ OBJECTS= \
$(PKGROOT)/src/tree/updater_quantile_hist.o \
$(PKGROOT)/src/tree/updater_refresh.o \
$(PKGROOT)/src/tree/updater_sync.o \
$(PKGROOT)/src/tree/hist/param.o \
$(PKGROOT)/src/linear/linear_updater.o \
$(PKGROOT)/src/linear/updater_coordinate.o \
$(PKGROOT)/src/linear/updater_shotgun.o \
Expand Down
1 change: 1 addition & 0 deletions R-package/src/Makevars.win
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ OBJECTS= \
$(PKGROOT)/src/tree/updater_quantile_hist.o \
$(PKGROOT)/src/tree/updater_refresh.o \
$(PKGROOT)/src/tree/updater_sync.o \
$(PKGROOT)/src/tree/hist/param.o \
$(PKGROOT)/src/linear/linear_updater.o \
$(PKGROOT)/src/linear/updater_coordinate.o \
$(PKGROOT)/src/linear/updater_shotgun.o \
Expand Down
4 changes: 3 additions & 1 deletion include/xgboost/linalg.h
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,9 @@ template <typename Container, typename... S,
std::enable_if_t<!common::detail::IsSpan<Container>::value &&
!std::is_pointer_v<Container>> * = nullptr>
auto MakeTensorView(Context const *ctx, Container &data, S &&...shape) { // NOLINT
using T = typename Container::value_type;
using T = std::conditional_t<std::is_const_v<Container>,
std::add_const_t<typename Container::value_type>,
typename Container::value_type>;
std::size_t in_shape[sizeof...(S)];
detail::IndexToArr(in_shape, std::forward<S>(shape)...);
return TensorView<T, sizeof...(S)>{data, in_shape, ctx->gpu_id};
Expand Down
41 changes: 22 additions & 19 deletions src/common/hist_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,11 @@ void InitilizeHistByZeroes(GHistRow hist, size_t begin, size_t end) {
/*!
* \brief Increment hist as dst += add in range [begin, end)
*/
void IncrementHist(GHistRow dst, const GHistRow add, size_t begin, size_t end) {
double* pdst = reinterpret_cast<double*>(dst.data());
void IncrementHist(GHistRow dst, ConstGHistRow add, std::size_t begin, std::size_t end) {
double *pdst = reinterpret_cast<double *>(dst.data());
const double *padd = reinterpret_cast<const double *>(add.data());

for (size_t i = 2 * begin; i < 2 * end; ++i) {
for (std::size_t i = 2 * begin; i < 2 * end; ++i) {
pdst[i] += padd[i];
}
}
Expand Down Expand Up @@ -207,18 +207,23 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,

const size_t size = row_indices.Size();
const size_t *rid = row_indices.begin;
auto const *pgh = reinterpret_cast<const float *>(gpair.data());
auto const *p_gpair = reinterpret_cast<const float *>(gpair.data());
const BinIdxType *gradient_index = gmat.index.data<BinIdxType>();

auto const &row_ptr = gmat.row_ptr.data();
auto base_rowid = gmat.base_rowid;
const uint32_t *offsets = gmat.index.Offset();
auto get_row_ptr = [&](size_t ridx) {
uint32_t const *offsets = gmat.index.Offset();
// There's no feature-based compression if missing value is present.
if (kAnyMissing) {
CHECK(!offsets);
} else {
CHECK(offsets);
}

auto get_row_ptr = [&](bst_row_t ridx) {
return kFirstPage ? row_ptr[ridx] : row_ptr[ridx - base_rowid];
};
auto get_rid = [&](size_t ridx) {
return kFirstPage ? ridx : (ridx - base_rowid);
};
auto get_rid = [&](bst_row_t ridx) { return kFirstPage ? ridx : (ridx - base_rowid); };

const size_t n_features =
get_row_ptr(row_indices.begin[0] + 1) - get_row_ptr(row_indices.begin[0]);
Expand All @@ -228,7 +233,7 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
// So we need to multiply each row-index/bin-index by 2
// to work with gradient pairs as a singe row FP array

for (size_t i = 0; i < size; ++i) {
for (std::size_t i = 0; i < size; ++i) {
const size_t icol_start =
kAnyMissing ? get_row_ptr(rid[i]) : get_rid(rid[i]) * n_features;
const size_t icol_end =
Expand All @@ -246,7 +251,7 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
kAnyMissing ? get_row_ptr(rid[i + Prefetch::kPrefetchOffset] + 1)
: icol_start_prefetch + n_features;

PREFETCH_READ_T0(pgh + two * rid[i + Prefetch::kPrefetchOffset]);
PREFETCH_READ_T0(p_gpair + two * rid[i + Prefetch::kPrefetchOffset]);
for (size_t j = icol_start_prefetch; j < icol_end_prefetch;
j += Prefetch::GetPrefetchStep<uint32_t>()) {
PREFETCH_READ_T0(gradient_index + j);
Expand All @@ -255,12 +260,12 @@ void RowsWiseBuildHistKernel(Span<GradientPair const> gpair,
const BinIdxType *gr_index_local = gradient_index + icol_start;

// The trick with pgh_t buffer helps the compiler to generate faster binary.
const float pgh_t[] = {pgh[idx_gh], pgh[idx_gh + 1]};
const float pgh_t[] = {p_gpair[idx_gh], p_gpair[idx_gh + 1]};
for (size_t j = 0; j < row_size; ++j) {
const uint32_t idx_bin = two * (static_cast<uint32_t>(gr_index_local[j]) +
(kAnyMissing ? 0 : offsets[j]));
const uint32_t idx_bin =
two * (static_cast<uint32_t>(gr_index_local[j]) + (kAnyMissing ? 0 : offsets[j]));
auto hist_local = hist_data + idx_bin;
*(hist_local) += pgh_t[0];
*(hist_local) += pgh_t[0];
*(hist_local + 1) += pgh_t[1];
}
}
Expand All @@ -281,12 +286,10 @@ void ColsWiseBuildHistKernel(Span<GradientPair const> gpair,
auto const &row_ptr = gmat.row_ptr.data();
auto base_rowid = gmat.base_rowid;
const uint32_t *offsets = gmat.index.Offset();
auto get_row_ptr = [&](size_t ridx) {
auto get_row_ptr = [&](bst_row_t ridx) {
return kFirstPage ? row_ptr[ridx] : row_ptr[ridx - base_rowid];
};
auto get_rid = [&](size_t ridx) {
return kFirstPage ? ridx : (ridx - base_rowid);
};
auto get_rid = [&](bst_row_t ridx) { return kFirstPage ? ridx : (ridx - base_rowid); };

const size_t n_features = gmat.cut.Ptrs().size() - 1;
const size_t n_columns = n_features;
Expand Down
3 changes: 2 additions & 1 deletion src/common/hist_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,7 @@ bst_bin_t XGBOOST_HOST_DEV_INLINE BinarySearchBin(std::size_t begin, std::size_t
}

using GHistRow = Span<xgboost::GradientPairPrecise>;
using ConstGHistRow = Span<xgboost::GradientPairPrecise const>;

/*!
* \brief fill a histogram by zeros
Expand All @@ -371,7 +372,7 @@ void InitilizeHistByZeroes(GHistRow hist, size_t begin, size_t end);
/*!
* \brief Increment hist as dst += add in range [begin, end)
*/
void IncrementHist(GHistRow dst, const GHistRow add, size_t begin, size_t end);
void IncrementHist(GHistRow dst, ConstGHistRow add, std::size_t begin, std::size_t end);

/*!
* \brief Copy hist from src to dst in range [begin, end)
Expand Down
9 changes: 4 additions & 5 deletions src/common/threading_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,19 +136,18 @@ class BlockedSpace2d {
// Wrapper to implement nested parallelism with simple omp parallel for
template <typename Func>
void ParallelFor2d(const BlockedSpace2d& space, int nthreads, Func func) {
const size_t num_blocks_in_space = space.Size();
std::size_t n_blocks_in_space = space.Size();
CHECK_GE(nthreads, 1);

dmlc::OMPException exc;
#pragma omp parallel num_threads(nthreads)
{
exc.Run([&]() {
size_t tid = omp_get_thread_num();
size_t chunck_size =
num_blocks_in_space / nthreads + !!(num_blocks_in_space % nthreads);
size_t chunck_size = n_blocks_in_space / nthreads + !!(n_blocks_in_space % nthreads);

size_t begin = chunck_size * tid;
size_t end = std::min(begin + chunck_size, num_blocks_in_space);
std::size_t begin = chunck_size * tid;
std::size_t end = std::min(begin + chunck_size, n_blocks_in_space);
for (auto i = begin; i < end; i++) {
func(space.GetFirstDimension(i), space.GetRange(i));
}
Expand Down
16 changes: 8 additions & 8 deletions src/tree/hist/evaluate_splits.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class HistEvaluator {
* pseudo-category for missing value but here we just do a complete scan to avoid
* making specialized histogram bin.
*/
void EnumerateOneHot(common::HistogramCuts const &cut, const common::GHistRow &hist,
void EnumerateOneHot(common::HistogramCuts const &cut, common::ConstGHistRow hist,
bst_feature_t fidx, bst_node_t nidx,
TreeEvaluator::SplitEvaluator<TrainParam> const &evaluator,
SplitEntry *p_best) const {
Expand Down Expand Up @@ -143,7 +143,7 @@ class HistEvaluator {
*/
template <int d_step>
void EnumeratePart(common::HistogramCuts const &cut, common::Span<size_t const> sorted_idx,
common::GHistRow const &hist, bst_feature_t fidx, bst_node_t nidx,
common::ConstGHistRow hist, bst_feature_t fidx, bst_node_t nidx,
TreeEvaluator::SplitEvaluator<TrainParam> const &evaluator,
SplitEntry *p_best) {
static_assert(d_step == +1 || d_step == -1, "Invalid step.");
Expand Down Expand Up @@ -214,7 +214,7 @@ class HistEvaluator {
// Returns the sum of gradients corresponding to the data points that contains
// a non-missing value for the particular feature fid.
template <int d_step>
GradStats EnumerateSplit(common::HistogramCuts const &cut, const common::GHistRow &hist,
GradStats EnumerateSplit(common::HistogramCuts const &cut, common::ConstGHistRow hist,
bst_feature_t fidx, bst_node_t nidx,
TreeEvaluator::SplitEvaluator<TrainParam> const &evaluator,
SplitEntry *p_best) const {
Expand Down Expand Up @@ -454,8 +454,8 @@ class HistEvaluator {
right_child);
}

auto Evaluator() const { return tree_evaluator_.GetEvaluator(); }
auto const& Stats() const { return snode_; }
[[nodiscard]] auto Evaluator() const { return tree_evaluator_.GetEvaluator(); }
[[nodiscard]] auto const &Stats() const { return snode_; }

float InitRoot(GradStats const &root_sum) {
snode_.resize(1);
Expand Down Expand Up @@ -510,7 +510,7 @@ class HistMultiEvaluator {

template <bst_bin_t d_step>
bool EnumerateSplit(common::HistogramCuts const &cut, bst_feature_t fidx,
common::Span<common::GHistRow const> hist,
common::Span<common::ConstGHistRow> hist,
linalg::VectorView<GradientPairPrecise const> parent_sum, double parent_gain,
SplitEntryContainer<std::vector<GradientPairPrecise>> *p_best) const {
auto const &cut_ptr = cut.Ptrs();
Expand Down Expand Up @@ -651,9 +651,9 @@ class HistMultiEvaluator {
auto entry = &tloc_candidates[n_threads * nidx_in_set + tidx];
auto best = &entry->split;
auto parent_sum = stats_.Slice(entry->nid, linalg::All());
std::vector<common::GHistRow> node_hist;
std::vector<common::ConstGHistRow> node_hist;
for (auto t_hist : hist) {
node_hist.push_back((*t_hist)[entry->nid]);
node_hist.emplace_back((*t_hist)[entry->nid]);
}
auto features_set = features[nidx_in_set]->ConstHostSpan();

Expand Down
34 changes: 34 additions & 0 deletions src/tree/hist/param.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/**
* Copyright 2021-2023, XGBoost Contributors
*/
#include "param.h"

#include <string> // for string

#include "../../collective/communicator-inl.h" // for GetRank, Broadcast
#include "xgboost/json.h" // for Object, Json
#include "xgboost/tree_model.h" // for RegTree

namespace xgboost::tree {
DMLC_REGISTER_PARAMETER(HistMakerTrainParam);

void HistMakerTrainParam::CheckTreesSynchronized(RegTree const* local_tree) const {
if (!this->debug_synchronize) {
return;
}

std::string s_model;
Json model{Object{}};
int rank = collective::GetRank();
if (rank == 0) {
local_tree->SaveModel(&model);
}
Json::Dump(model, &s_model, std::ios::binary);
collective::Broadcast(&s_model, 0);

RegTree ref_tree{}; // rank 0 tree
auto j_ref_tree = Json::Load(StringView{s_model}, std::ios::binary);
ref_tree.LoadModel(j_ref_tree);
CHECK(*local_tree == ref_tree);
}
} // namespace xgboost::tree
20 changes: 20 additions & 0 deletions src/tree/hist/param.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/**
* Copyright 2021-2023, XGBoost Contributors
*/
#pragma once
#include "xgboost/parameter.h"
#include "xgboost/tree_model.h" // for RegTree

namespace xgboost::tree {
struct HistMakerTrainParam : public XGBoostParameter<HistMakerTrainParam> {
bool debug_synchronize;
void CheckTreesSynchronized(RegTree const* local_tree) const;

// declare parameters
DMLC_DECLARE_PARAMETER(HistMakerTrainParam) {
DMLC_DECLARE_FIELD(debug_synchronize)
.set_default(false)
.describe("Check if all distributed tree are identical after tree construction.");
}
};
} // namespace xgboost::tree
29 changes: 21 additions & 8 deletions src/tree/updater_approx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@
#include "driver.h"
#include "hist/evaluate_splits.h"
#include "hist/histogram.h"
#include "hist/param.h"
#include "hist/sampler.h" // for SampleGradient
#include "param.h"
#include "param.h" // for HistMakerTrainParam
#include "xgboost/base.h"
#include "xgboost/data.h"
#include "xgboost/json.h"
#include "xgboost/linalg.h"
#include "xgboost/task.h" // for ObjInfo
#include "xgboost/task.h" // for ObjInfo
#include "xgboost/tree_model.h"
#include "xgboost/tree_updater.h" // for TreeUpdater

Expand All @@ -42,6 +43,7 @@ auto BatchSpec(TrainParam const &p, common::Span<float> hess) {
class GloablApproxBuilder {
protected:
TrainParam const *param_;
HistMakerTrainParam const *hist_param_{nullptr};
std::shared_ptr<common::ColumnSampler> col_sampler_;
HistEvaluator evaluator_;
HistogramBuilder<CPUExpandEntry> histogram_builder_;
Expand Down Expand Up @@ -168,10 +170,12 @@ class GloablApproxBuilder {
}

public:
explicit GloablApproxBuilder(TrainParam const *param, MetaInfo const &info, Context const *ctx,
explicit GloablApproxBuilder(TrainParam const *param, HistMakerTrainParam const *hist_param,
MetaInfo const &info, Context const *ctx,
std::shared_ptr<common::ColumnSampler> column_sampler,
ObjInfo const *task, common::Monitor *monitor)
: param_{param},
hist_param_{hist_param},
col_sampler_{std::move(column_sampler)},
evaluator_{ctx, param_, info, col_sampler_},
ctx_{ctx},
Expand Down Expand Up @@ -259,16 +263,23 @@ class GlobalApproxUpdater : public TreeUpdater {
std::shared_ptr<common::ColumnSampler> column_sampler_ =
std::make_shared<common::ColumnSampler>();
ObjInfo const *task_;
HistMakerTrainParam hist_param_;

public:
explicit GlobalApproxUpdater(Context const *ctx, ObjInfo const *task)
: TreeUpdater(ctx), task_{task} {
monitor_.Init(__func__);
}

void Configure(Args const &) override {}
void LoadConfig(Json const &) override {}
void SaveConfig(Json *) const override {}
void Configure(Args const &args) override { hist_param_.UpdateAllowUnknown(args); }
void LoadConfig(Json const &in) override {
auto const &config = get<Object const>(in);
FromJson(config.at("hist_train_param"), &hist_param_);
}
void SaveConfig(Json *p_out) const override {
auto &out = *p_out;
out["hist_train_param"] = ToJson(hist_param_);
}

void InitData(TrainParam const &param, HostDeviceVector<GradientPair> const *gpair,
linalg::Matrix<GradientPair> *sampled) {
Expand All @@ -283,8 +294,9 @@ class GlobalApproxUpdater : public TreeUpdater {
void Update(TrainParam const *param, HostDeviceVector<GradientPair> *gpair, DMatrix *m,
common::Span<HostDeviceVector<bst_node_t>> out_position,
const std::vector<RegTree *> &trees) override {
pimpl_ = std::make_unique<GloablApproxBuilder>(param, m->Info(), ctx_, column_sampler_, task_,
&monitor_);
CHECK(hist_param_.GetInitialised());
pimpl_ = std::make_unique<GloablApproxBuilder>(param, &hist_param_, m->Info(), ctx_,
column_sampler_, task_, &monitor_);

linalg::Matrix<GradientPair> h_gpair;
// Obtain the hessian values for weighted sketching
Expand All @@ -299,6 +311,7 @@ class GlobalApproxUpdater : public TreeUpdater {
std::size_t t_idx = 0;
for (auto p_tree : trees) {
this->pimpl_->UpdateTree(m, s_gpair, hess, p_tree, &out_position[t_idx]);
hist_param_.CheckTreesSynchronized(p_tree);
++t_idx;
}
}
Expand Down
Loading