Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix column split race condition. #10572

Merged
merged 5 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 41 additions & 7 deletions src/tree/common_row_partitioner.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,11 @@ class ColumnSplitHelper {
common::PartitionBuilder<kPartitionBlockSize>* partition_builder,
common::RowSetCollection* row_set_collection)
: partition_builder_{partition_builder}, row_set_collection_{row_set_collection} {
decision_storage_.resize(num_row);
decision_bits_ = BitVector(common::Span<BitVector::value_type>(decision_storage_));
missing_storage_.resize(num_row);
missing_bits_ = BitVector(common::Span<BitVector::value_type>(missing_storage_));
auto n_bytes = BitVector::ComputeStorageSize(num_row);
decision_storage_.resize(n_bytes);
decision_bits_ = BitVector{common::Span<BitVector::value_type>{decision_storage_}};
missing_storage_.resize(n_bytes);
missing_bits_ = BitVector{common::Span<BitVector::value_type>{missing_storage_}};
}

template <typename BinIdxType, bool any_missing, bool any_cat, typename ExpandEntry>
Expand All @@ -51,14 +52,43 @@ class ColumnSplitHelper {
// we first collect all the decisions and whether the feature is missing into bit vectors.
std::fill(decision_storage_.begin(), decision_storage_.end(), 0);
std::fill(missing_storage_.begin(), missing_storage_.end(), 0);
common::ParallelFor2d(space, n_threads, [&](size_t node_in_set, common::Range1d r) {
const int32_t nid = nodes[node_in_set].nid;

this->tloc_decision_.resize(decision_storage_.size() * n_threads);
this->tloc_missing_.resize(decision_storage_.size() * n_threads);
std::fill_n(this->tloc_decision_.data(), this->tloc_decision_.size(), 0);
std::fill_n(this->tloc_missing_.data(), this->tloc_missing_.size(), 0);

// Make thread-local storage.
using T = decltype(decision_storage_)::value_type;
auto make_tloc = [&](std::vector<T>& storage, std::int32_t tidx) {
auto span = common::Span<T>{storage};
auto n = decision_storage_.size();
auto bitvec = BitVector{span.subspan(n * tidx, n)};
return bitvec;
};

common::ParallelFor2d(space, n_threads, [&](std::size_t node_in_set, common::Range1d r) {
bst_node_t const nid = nodes[node_in_set].nid;
auto tidx = omp_get_thread_num();
auto decision = make_tloc(this->tloc_decision_, tidx);
auto missing = make_tloc(this->tloc_missing_, tidx);
bst_bin_t split_cond = column_matrix.IsInitialized() ? split_conditions[node_in_set] : 0;
partition_builder_->MaskRows<BinIdxType, any_missing, any_cat>(
node_in_set, nodes, r, split_cond, gmat, column_matrix, *p_tree,
(*row_set_collection_)[nid].begin(), &decision_bits_, &missing_bits_);
(*row_set_collection_)[nid].begin(), &decision, &missing);
});

// Reduce thread local
auto decision = make_tloc(this->tloc_decision_, 0);
auto missing = make_tloc(this->tloc_missing_, 0);
for (std::int32_t tidx = 1; tidx < n_threads; ++tidx) {
decision |= make_tloc(this->tloc_decision_, tidx);
missing |= make_tloc(this->tloc_missing_, tidx);
}
CHECK_EQ(decision_storage_.size(), decision.NumValues());
std::copy_n(decision.Data(), decision_storage_.size(), decision_storage_.data());
std::copy_n(missing.Data(), missing_storage_.size(), missing_storage_.data());

// Then aggregate the bit vectors across all the workers.
auto rc = collective::Success() << [&] {
return collective::Allreduce(ctx, &decision_storage_, collective::Op::kBitwiseOR);
Expand All @@ -85,6 +115,10 @@ class ColumnSplitHelper {
BitVector decision_bits_{};
std::vector<BitVector::value_type> missing_storage_{};
BitVector missing_bits_{};

std::vector<BitVector::value_type> tloc_decision_;
std::vector<BitVector::value_type> tloc_missing_;

common::PartitionBuilder<kPartitionBlockSize>* partition_builder_;
common::RowSetCollection* row_set_collection_;
};
Expand Down
23 changes: 23 additions & 0 deletions tests/cpp/tree/test_approx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "../../../src/tree/common_row_partitioner.h"
#include "../collective/test_worker.h" // for TestDistributedGlobal
#include "../helpers.h"
#include "test_column_split.h" // for TestColumnSplit
#include "test_partitioner.h"

namespace xgboost::tree {
Expand Down Expand Up @@ -154,4 +155,26 @@ TEST(Approx, PartitionerColSplit) {
mid_partitioner);
});
}

namespace {
class TestApproxColSplit : public ::testing::TestWithParam<std::tuple<bool, float>> {
public:
void Run() {
auto [categorical, sparsity] = GetParam();
TestColumnSplit(1u, categorical, "grow_histmaker", sparsity);
}
};
} // namespace

TEST_P(TestApproxColSplit, Basic) { this->Run(); }

INSTANTIATE_TEST_SUITE_P(ColumnSplit, TestApproxColSplit, ::testing::ValuesIn([]() {
std::vector<std::tuple<bool, float>> params;
for (auto categorical : {true, false}) {
for (auto sparsity : {0.0f, 0.6f}) {
params.emplace_back(categorical, sparsity);
}
}
return params;
}()));
} // namespace xgboost::tree
79 changes: 79 additions & 0 deletions tests/cpp/tree/test_column_split.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/**
* Copyright 2023-2024, XGBoost Contributors
*/
#pragma once

#include <xgboost/data.h> // for FeatureType, DMatrix
#include <xgboost/tree_model.h> // for RegTree
#include <xgboost/tree_updater.h> // for TreeUpdater

#include <cstddef> // for size_t
#include <memory> // for shared_ptr
#include <vector> // for vector

#include "../../../src/tree/param.h" // for TrainParam
#include "../collective/test_worker.h" // for TestDistributedGlobal
#include "../helpers.h" // for RandomDataGenerator

namespace xgboost::tree {
inline std::shared_ptr<DMatrix> GenerateCatDMatrix(std::size_t rows, std::size_t cols,
float sparsity, bool categorical) {
if (categorical) {
std::vector<FeatureType> ft(cols);
for (size_t i = 0; i < ft.size(); ++i) {
ft[i] = (i % 3 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;
}
return RandomDataGenerator(rows, cols, 0.6f).Seed(3).Type(ft).MaxCategory(17).GenerateDMatrix();
} else {
return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix();
}
}

inline void TestColumnSplit(bst_target_t n_targets, bool categorical, std::string name,
float sparsity) {
auto constexpr kRows = 32;
auto constexpr kCols = 16;

RegTree expected_tree{n_targets, static_cast<bst_feature_t>(kCols)};
ObjInfo task{ObjInfo::kRegression};
Context ctx;
{
auto p_dmat = GenerateCatDMatrix(kRows, kCols, sparsity, categorical);
auto gpair = GenerateRandomGradients(&ctx, kRows, n_targets);
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(name, &ctx, &task)};
std::vector<HostDeviceVector<bst_node_t>> position(1);
TrainParam param;
param.Init(Args{});
updater->Configure(Args{});
updater->Update(&param, &gpair, p_dmat.get(), position, {&expected_tree});
}

auto verify = [&] {
Context ctx;
auto p_dmat = GenerateCatDMatrix(kRows, kCols, sparsity, categorical);
auto gpair = GenerateRandomGradients(&ctx, kRows, n_targets);

ObjInfo task{ObjInfo::kRegression};
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create(name, &ctx, &task)};
std::vector<HostDeviceVector<bst_node_t>> position(1);

std::unique_ptr<DMatrix> sliced{
p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};

RegTree tree{n_targets, static_cast<bst_feature_t>(kCols)};
TrainParam param;
param.Init(Args{});
updater->Configure(Args{});
updater->Update(&param, &gpair, sliced.get(), position, {&tree});

Json json{Object{}};
tree.SaveModel(&json);
Json expected_json{Object{}};
expected_tree.SaveModel(&expected_json);
ASSERT_EQ(json, expected_json);
};

auto constexpr kWorldSize = 2;
collective::TestDistributedGlobal(kWorldSize, [&] { verify(); });
}
} // namespace xgboost::tree
79 changes: 4 additions & 75 deletions tests/cpp/tree/test_histmaker.cc
Original file line number Diff line number Diff line change
@@ -1,32 +1,19 @@
/**
* Copyright 2019-2023 by XGBoost Contributors
* Copyright 2019-2024, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/tree_model.h>
#include <xgboost/tree_updater.h>

#include "../../../src/tree/param.h" // for TrainParam
#include "../collective/test_worker.h" // for TestDistributedGlobal
#include "../../../src/tree/param.h" // for TrainParam
#include "../helpers.h"
#include "test_column_split.h" // for GenerateCatDMatrix

namespace xgboost::tree {
std::shared_ptr<DMatrix> GenerateDMatrix(std::size_t rows, std::size_t cols,
bool categorical = false) {
if (categorical) {
std::vector<FeatureType> ft(cols);
for (size_t i = 0; i < ft.size(); ++i) {
ft[i] = (i % 3 == 0) ? FeatureType::kNumerical : FeatureType::kCategorical;
}
return RandomDataGenerator(rows, cols, 0.6f).Seed(3).Type(ft).MaxCategory(17).GenerateDMatrix();
} else {
return RandomDataGenerator{rows, cols, 0.6f}.Seed(3).GenerateDMatrix();
}
}

TEST(GrowHistMaker, InteractionConstraint) {
auto constexpr kRows = 32;
auto constexpr kCols = 16;
auto p_dmat = GenerateDMatrix(kRows, kCols);
auto p_dmat = GenerateCatDMatrix(kRows, kCols, 0.0, false);
Context ctx;

linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
Expand Down Expand Up @@ -69,62 +56,4 @@ TEST(GrowHistMaker, InteractionConstraint) {
ASSERT_NE(tree[tree[0].RightChild()].SplitIndex(), 0);
}
}

namespace {
void VerifyColumnSplit(int32_t rows, bst_feature_t cols, bool categorical,
RegTree const& expected_tree) {
Context ctx;
auto p_dmat = GenerateDMatrix(rows, cols, categorical);
linalg::Matrix<GradientPair> gpair({rows}, ctx.Device());
gpair.Data()->Copy(GenerateRandomGradients(rows));


ObjInfo task{ObjInfo::kRegression};
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
std::vector<HostDeviceVector<bst_node_t>> position(1);

std::unique_ptr<DMatrix> sliced{
p_dmat->SliceCol(collective::GetWorldSize(), collective::GetRank())};

RegTree tree{1u, cols};
TrainParam param;
param.Init(Args{});
updater->Configure(Args{});
updater->Update(&param, &gpair, sliced.get(), position, {&tree});

Json json{Object{}};
tree.SaveModel(&json);
Json expected_json{Object{}};
expected_tree.SaveModel(&expected_json);
ASSERT_EQ(json, expected_json);
}

void TestColumnSplit(bool categorical) {
auto constexpr kRows = 32;
auto constexpr kCols = 16;

RegTree expected_tree{1u, kCols};
ObjInfo task{ObjInfo::kRegression};
{
Context ctx;
auto p_dmat = GenerateDMatrix(kRows, kCols, categorical);
linalg::Matrix<GradientPair> gpair({kRows}, ctx.Device());
gpair.Data()->Copy(GenerateRandomGradients(kRows));
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_histmaker", &ctx, &task)};
std::vector<HostDeviceVector<bst_node_t>> position(1);
TrainParam param;
param.Init(Args{});
updater->Configure(Args{});
updater->Update(&param, &gpair, p_dmat.get(), position, {&expected_tree});
}

auto constexpr kWorldSize = 2;
collective::TestDistributedGlobal(
kWorldSize, [&] { VerifyColumnSplit(kRows, kCols, categorical, expected_tree); });
}
} // anonymous namespace

TEST(GrowHistMaker, ColumnSplitNumerical) { TestColumnSplit(false); }

TEST(GrowHistMaker, ColumnSplitCategorical) { TestColumnSplit(true); }
} // namespace xgboost::tree
71 changes: 20 additions & 51 deletions tests/cpp/tree/test_quantile_hist.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

#include "../../../src/tree/common_row_partitioner.h"
#include "../../../src/tree/hist/expand_entry.h" // for MultiExpandEntry, CPUExpandEntry
#include "../../../src/tree/param.h"
#include "../collective/test_worker.h" // for TestDistributedGlobal
#include "../helpers.h"
#include "test_column_split.h" // for TestColumnSplit
#include "test_partitioner.h"
#include "xgboost/data.h"

Expand Down Expand Up @@ -208,57 +208,26 @@ TEST(QuantileHist, PartitionerColSplit) { TestColumnSplitPartitioner<CPUExpandEn
TEST(QuantileHist, MultiPartitionerColSplit) { TestColumnSplitPartitioner<MultiExpandEntry>(3); }

namespace {
void VerifyColumnSplit(Context const* ctx, bst_idx_t rows, bst_feature_t cols, bst_target_t n_targets,
RegTree const& expected_tree) {
auto Xy = RandomDataGenerator{rows, cols, 0}.GenerateDMatrix(true);
linalg::Matrix<GradientPair> gpair = GenerateRandomGradients(ctx, rows, n_targets);

ObjInfo task{ObjInfo::kRegression};
std::unique_ptr<TreeUpdater> updater{TreeUpdater::Create("grow_quantile_histmaker", ctx, &task)};
std::vector<HostDeviceVector<bst_node_t>> position(1);

std::unique_ptr<DMatrix> sliced{Xy->SliceCol(collective::GetWorldSize(), collective::GetRank())};

RegTree tree{n_targets, cols};
TrainParam param;
param.Init(Args{});
updater->Configure(Args{});
updater->Update(&param, &gpair, sliced.get(), position, {&tree});

Json json{Object{}};
tree.SaveModel(&json);
Json expected_json{Object{}};
expected_tree.SaveModel(&expected_json);
ASSERT_EQ(json, expected_json);
}

void TestColumnSplit(bst_target_t n_targets) {
auto constexpr kRows = 32;
auto constexpr kCols = 16;

RegTree expected_tree{n_targets, kCols};
ObjInfo task{ObjInfo::kRegression};
Context ctx;
{
auto Xy = RandomDataGenerator{kRows, kCols, 0}.GenerateDMatrix(true);
auto gpair = GenerateRandomGradients(&ctx, kRows, n_targets);
std::unique_ptr<TreeUpdater> updater{
TreeUpdater::Create("grow_quantile_histmaker", &ctx, &task)};
std::vector<HostDeviceVector<bst_node_t>> position(1);
TrainParam param;
param.Init(Args{});
updater->Configure(Args{});
updater->Update(&param, &gpair, Xy.get(), position, {&expected_tree});
class TestHistColSplit : public ::testing::TestWithParam<std::tuple<bst_target_t, bool, float>> {
public:
void Run() {
auto [n_targets, categorical, sparsity] = GetParam();
TestColumnSplit(n_targets, categorical, "grow_quantile_histmaker", sparsity);
}

auto constexpr kWorldSize = 2;
collective::TestDistributedGlobal(kWorldSize, [&] {
VerifyColumnSplit(&ctx, kRows, kCols, n_targets, std::cref(expected_tree));
});
}
};
} // anonymous namespace

TEST(QuantileHist, ColumnSplit) { TestColumnSplit(1); }

TEST(QuantileHist, ColumnSplitMultiTarget) { TestColumnSplit(3); }
TEST_P(TestHistColSplit, Basic) { this->Run(); }

INSTANTIATE_TEST_SUITE_P(ColumnSplit, TestHistColSplit, ::testing::ValuesIn([]() {
std::vector<std::tuple<bst_target_t, bool, float>> params;
for (auto categorical : {true, false}) {
for (auto sparsity : {0.0f, 0.6f}) {
for (bst_target_t n_targets : {1u, 3u}) {
params.emplace_back(n_targets, categorical, sparsity);
}
}
}
return params;
}()));
} // namespace xgboost::tree
Loading