-
-
Notifications
You must be signed in to change notification settings - Fork 8.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SYCL] Initial implementation of
GHistIndexMatrix
(#10045)
Co-authored-by: Dmitry Razdoburdin <>
- Loading branch information
1 parent
7cc256e
commit 057f03c
Showing
6 changed files
with
510 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
/*! | ||
* Copyright 2017-2024 by Contributors | ||
* \file gradient_index.cc | ||
*/ | ||
#include <vector> | ||
#include <limits> | ||
#include <algorithm> | ||
|
||
#include "gradient_index.h" | ||
|
||
#include <CL/sycl.hpp> | ||
|
||
namespace xgboost { | ||
namespace sycl { | ||
namespace common { | ||
|
||
uint32_t SearchBin(const bst_float* cut_values, const uint32_t* cut_ptrs, Entry const& e) { | ||
auto beg = cut_ptrs[e.index]; | ||
auto end = cut_ptrs[e.index + 1]; | ||
auto it = std::upper_bound(cut_values + beg, cut_values + end, e.fvalue); | ||
uint32_t idx = it - cut_values; | ||
if (idx == end) { | ||
idx -= 1; | ||
} | ||
return idx; | ||
} | ||
|
||
template <typename BinIdxType> | ||
void mergeSort(BinIdxType* begin, BinIdxType* end, BinIdxType* buf) { | ||
const size_t total_len = end - begin; | ||
for (size_t block_len = 1; block_len < total_len; block_len <<= 1) { | ||
for (size_t cur_block = 0; cur_block + block_len < total_len; cur_block += 2 * block_len) { | ||
size_t start = cur_block; | ||
size_t mid = start + block_len; | ||
size_t finish = mid + block_len < total_len ? mid + block_len : total_len; | ||
size_t left_pos = start; | ||
size_t right_pos = mid; | ||
size_t pos = start; | ||
while (left_pos < mid || right_pos < finish) { | ||
if (left_pos < mid && (right_pos == finish || begin[left_pos] < begin[right_pos])) { | ||
buf[pos++] = begin[left_pos++]; | ||
} else { | ||
buf[pos++] = begin[right_pos++]; | ||
} | ||
} | ||
for (size_t i = start; i < finish; i++) begin[i] = buf[i]; | ||
} | ||
} | ||
} | ||
|
||
template <typename BinIdxType> | ||
void GHistIndexMatrix::SetIndexData(::sycl::queue qu, | ||
BinIdxType* index_data, | ||
const DeviceMatrix &dmat, | ||
size_t nbins, | ||
size_t row_stride, | ||
uint32_t* offsets) { | ||
if (nbins == 0) return; | ||
const xgboost::Entry *data_ptr = dmat.data.DataConst(); | ||
const bst_row_t *offset_vec = dmat.row_ptr.DataConst(); | ||
const size_t num_rows = dmat.row_ptr.Size() - 1; | ||
const bst_float* cut_values = cut_device.Values().DataConst(); | ||
const uint32_t* cut_ptrs = cut_device.Ptrs().DataConst(); | ||
size_t* hit_count_ptr = hit_count_buff.Data(); | ||
|
||
// Sparse case only | ||
if (!offsets) { | ||
// sort_buff has type uint8_t | ||
sort_buff.Resize(&qu, num_rows * row_stride * sizeof(BinIdxType)); | ||
} | ||
BinIdxType* sort_data = reinterpret_cast<BinIdxType*>(sort_buff.Data()); | ||
|
||
auto event = qu.submit([&](::sycl::handler& cgh) { | ||
cgh.parallel_for<>(::sycl::range<1>(num_rows), [=](::sycl::item<1> pid) { | ||
const size_t i = pid.get_id(0); | ||
const size_t ibegin = offset_vec[i]; | ||
const size_t iend = offset_vec[i + 1]; | ||
const size_t size = iend - ibegin; | ||
const size_t start = i * row_stride; | ||
for (bst_uint j = 0; j < size; ++j) { | ||
uint32_t idx = SearchBin(cut_values, cut_ptrs, data_ptr[ibegin + j]); | ||
index_data[start + j] = offsets ? idx - offsets[j] : idx; | ||
AtomicRef<size_t> hit_count_ref(hit_count_ptr[idx]); | ||
hit_count_ref.fetch_add(1); | ||
} | ||
if (!offsets) { | ||
// Sparse case only | ||
mergeSort<BinIdxType>(index_data + start, index_data + start + size, sort_data + start); | ||
for (bst_uint j = size; j < row_stride; ++j) { | ||
index_data[start + j] = nbins; | ||
} | ||
} | ||
}); | ||
}); | ||
qu.memcpy(hit_count.data(), hit_count_ptr, nbins * sizeof(size_t), event); | ||
qu.wait(); | ||
} | ||
|
||
void GHistIndexMatrix::ResizeIndex(size_t n_index, bool isDense) { | ||
if ((max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint8_t>::max())) && isDense) { | ||
index.SetBinTypeSize(BinTypeSize::kUint8BinsTypeSize); | ||
index.Resize((sizeof(uint8_t)) * n_index); | ||
} else if ((max_num_bins - 1 > static_cast<int>(std::numeric_limits<uint8_t>::max()) && | ||
max_num_bins - 1 <= static_cast<int>(std::numeric_limits<uint16_t>::max())) && isDense) { | ||
index.SetBinTypeSize(BinTypeSize::kUint16BinsTypeSize); | ||
index.Resize((sizeof(uint16_t)) * n_index); | ||
} else { | ||
index.SetBinTypeSize(BinTypeSize::kUint32BinsTypeSize); | ||
index.Resize((sizeof(uint32_t)) * n_index); | ||
} | ||
} | ||
|
||
void GHistIndexMatrix::Init(::sycl::queue qu, | ||
Context const * ctx, | ||
const DeviceMatrix& p_fmat_device, | ||
int max_bins) { | ||
nfeatures = p_fmat_device.p_mat->Info().num_col_; | ||
|
||
cut = xgboost::common::SketchOnDMatrix(ctx, p_fmat_device.p_mat, max_bins); | ||
cut_device.Init(qu, cut); | ||
|
||
max_num_bins = max_bins; | ||
const uint32_t nbins = cut.Ptrs().back(); | ||
this->nbins = nbins; | ||
hit_count.resize(nbins, 0); | ||
hit_count_buff.Resize(&qu, nbins, 0); | ||
|
||
this->p_fmat = p_fmat_device.p_mat; | ||
const bool isDense = p_fmat_device.p_mat->IsDense(); | ||
this->isDense_ = isDense; | ||
|
||
index.setQueue(qu); | ||
|
||
row_stride = 0; | ||
for (const auto& batch : p_fmat_device.p_mat->GetBatches<SparsePage>()) { | ||
const auto& row_offset = batch.offset.ConstHostVector(); | ||
for (auto i = 1ull; i < row_offset.size(); i++) { | ||
row_stride = std::max(row_stride, static_cast<size_t>(row_offset[i] - row_offset[i - 1])); | ||
} | ||
} | ||
|
||
const size_t n_offsets = cut_device.Ptrs().Size() - 1; | ||
const size_t n_rows = p_fmat_device.row_ptr.Size() - 1; | ||
const size_t n_index = n_rows * row_stride; | ||
ResizeIndex(n_index, isDense); | ||
|
||
CHECK_GT(cut_device.Values().Size(), 0U); | ||
|
||
uint32_t* offsets = nullptr; | ||
if (isDense) { | ||
index.ResizeOffset(n_offsets); | ||
offsets = index.Offset(); | ||
qu.memcpy(offsets, cut_device.Ptrs().DataConst(), | ||
sizeof(uint32_t) * n_offsets).wait_and_throw(); | ||
} | ||
|
||
if (isDense) { | ||
BinTypeSize curent_bin_size = index.GetBinTypeSize(); | ||
if (curent_bin_size == BinTypeSize::kUint8BinsTypeSize) { | ||
SetIndexData(qu, index.data<uint8_t>(), p_fmat_device, nbins, row_stride, offsets); | ||
|
||
} else if (curent_bin_size == BinTypeSize::kUint16BinsTypeSize) { | ||
SetIndexData(qu, index.data<uint16_t>(), p_fmat_device, nbins, row_stride, offsets); | ||
} else { | ||
CHECK_EQ(curent_bin_size, BinTypeSize::kUint32BinsTypeSize); | ||
SetIndexData(qu, index.data<uint32_t>(), p_fmat_device, nbins, row_stride, offsets); | ||
} | ||
/* For sparse DMatrix we have to store index of feature for each bin | ||
in index field to chose right offset. So offset is nullptr and index is not reduced */ | ||
} else { | ||
SetIndexData(qu, index.data<uint32_t>(), p_fmat_device, nbins, row_stride, offsets); | ||
} | ||
} | ||
|
||
} // namespace common | ||
} // namespace sycl | ||
} // namespace xgboost |
Oops, something went wrong.