diff --git a/include/xgboost/collective/socket.h b/include/xgboost/collective/socket.h index c5dd977f6255..84885cf317f5 100644 --- a/include/xgboost/collective/socket.h +++ b/include/xgboost/collective/socket.h @@ -548,13 +548,10 @@ class TCPSocket { [[nodiscard]] HandleT const &Handle() const { return handle_; } /** * @brief Listen to incoming requests. Should be called after bind. + * + * Both the default and minimum backlog is set to 256. */ - [[nodiscard]] Result Listen(std::int32_t backlog = 16) { - if (listen(handle_, backlog) != 0) { - return system::FailWithCode("Failed to listen."); - } - return Success(); - } + [[nodiscard]] Result Listen(std::int32_t backlog = 256); /** * @brief Bind socket to INADDR_ANY, return the port selected by the OS. */ diff --git a/src/collective/socket.cc b/src/collective/socket.cc index 5145c13a1366..dd6c05e6f964 100644 --- a/src/collective/socket.cc +++ b/src/collective/socket.cc @@ -3,6 +3,7 @@ */ #include "xgboost/collective/socket.h" +#include // for max #include // for array #include // std::size_t #include // std::int32_t @@ -58,6 +59,14 @@ SockAddrV4 SockAddrV4::InaddrAny() { return MakeSockAddress("0.0.0.0", 0).V4(); SockAddrV6 SockAddrV6::Loopback() { return MakeSockAddress("::1", 0).V6(); } SockAddrV6 SockAddrV6::InaddrAny() { return MakeSockAddress("::", 0).V6(); } +[[nodiscard]] Result TCPSocket::Listen(std::int32_t backlog) { + backlog = std::max(backlog, 256); + if (listen(this->handle_, backlog) != 0) { + return system::FailWithCode("Failed to listen."); + } + return Success(); +} + std::size_t TCPSocket::Send(StringView str) { CHECK(!this->IsClosed()); CHECK_LT(str.size(), std::numeric_limits::max()); diff --git a/src/collective/tracker.cc b/src/collective/tracker.cc index c8776f294690..9bffbc5590e5 100644 --- a/src/collective/tracker.cc +++ b/src/collective/tracker.cc @@ -120,7 +120,8 @@ RabitTracker::RabitTracker(Json const& config) : Tracker{config} { listener_ = TCPSocket::Create(addr.IsV4() ? SockDomain::kV4 : SockDomain::kV6); return listener_.Bind(host_, &this->port_); } << [&] { - return listener_.Listen(); + CHECK_GT(this->n_workers_, 0); + return listener_.Listen(this->n_workers_); }; SafeColl(rc); } diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh index f4fce42f84f8..03b8f2c0a40c 100644 --- a/src/common/device_helpers.cuh +++ b/src/common/device_helpers.cuh @@ -224,13 +224,6 @@ __global__ void LaunchNKernel(size_t begin, size_t end, L lambda) { lambda(i); } } -template -__global__ void LaunchNKernel(int device_idx, size_t begin, size_t end, - L lambda) { - for (auto i : GridStrideRange(begin, end)) { - lambda(i, device_idx); - } -} /* \brief A wrapper around kernel launching syntax, used to guard against empty input. * diff --git a/src/tree/gpu_hist/row_partitioner.cuh b/src/tree/gpu_hist/row_partitioner.cuh index fde6c4dd0fa9..cf1f0e4edf6e 100644 --- a/src/tree/gpu_hist/row_partitioner.cuh +++ b/src/tree/gpu_hist/row_partitioner.cuh @@ -146,10 +146,11 @@ void SortPositionBatch(common::Span> d_batch_info, // Value found by experimentation const int kItemsThread = 12; - const int grid_size = xgboost::common::DivRoundUp(total_rows, kBlockSize * kItemsThread); - SortPositionCopyKernel - <<>>(batch_info_itr, ridx, ridx_tmp, total_rows); + std::uint32_t const kGridSize = + xgboost::common::DivRoundUp(total_rows, kBlockSize * kItemsThread); + dh::LaunchKernel{kGridSize, kBlockSize, 0}(SortPositionCopyKernel, + batch_info_itr, ridx, ridx_tmp, total_rows); } struct NodePositionInfo { @@ -328,11 +329,13 @@ class RowPartitioner { sizeof(NodePositionInfo) * ridx_segments_.size(), cudaMemcpyDefault)); - constexpr int kBlockSize = 512; + constexpr std::uint32_t kBlockSize = 512; const int kItemsThread = 8; - const int grid_size = xgboost::common::DivRoundUp(ridx_.size(), kBlockSize * kItemsThread); + const std::uint32_t grid_size = + xgboost::common::DivRoundUp(ridx_.size(), kBlockSize * kItemsThread); common::Span d_ridx(ridx_.data().get(), ridx_.size()); - FinalisePositionKernel<<>>( + dh::LaunchKernel{grid_size, kBlockSize}( + FinalisePositionKernel, dh::ToSpan(d_node_info_storage), d_ridx, d_out_position, op); } }; diff --git a/tests/cpp/tree/gpu_hist/test_row_partitioner.cu b/tests/cpp/tree/gpu_hist/test_row_partitioner.cu index 14ea6fd70a4e..54bf17247432 100644 --- a/tests/cpp/tree/gpu_hist/test_row_partitioner.cu +++ b/tests/cpp/tree/gpu_hist/test_row_partitioner.cu @@ -6,15 +6,12 @@ #include #include -#include #include #include "../../../../src/tree/gpu_hist/row_partitioner.cuh" #include "../../helpers.h" #include "xgboost/base.h" -#include "xgboost/context.h" -#include "xgboost/task.h" -#include "xgboost/tree_model.h" +#include "../../helpers.h" // for RandomDataGenerator namespace xgboost::tree { void TestUpdatePositionBatch() { @@ -55,7 +52,9 @@ void TestSortPositionBatch(const std::vector& ridx_in, const std::vector ridx_tmp(ridx_in.size()); thrust::device_vector counts(segments.size()); - auto op = [=] __device__(auto ridx, int split_index, int data) { return ridx % 2 == 0; }; + auto op = [=] __device__(auto ridx, int split_index, int data) { + return ridx % 2 == 0; + }; std::vector op_data(segments.size()); std::vector> h_batch_info(segments.size()); dh::TemporaryArray> d_batch_info(segments.size()); @@ -73,7 +72,9 @@ void TestSortPositionBatch(const std::vector& ridx_in, const std::vector& ridx_in, const std::vector