dmlc · trivialfis · Mar 30, 2023 · Mar 27, 2023 · Mar 27, 2023
diff --git a/src/common/ranking_utils.h b/src/common/ranking_utils.h
@@ -123,7 +123,7 @@ struct LambdaRankParam : public XGBoostParameter<LambdaRankParam> {
 
   DMLC_DECLARE_PARAMETER(LambdaRankParam) {
     DMLC_DECLARE_FIELD(lambdarank_pair_method)
-        .set_default(PairMethod::kMean)
+        .set_default(PairMethod::kTopK)
         .add_enum("mean", PairMethod::kMean)
         .add_enum("topk", PairMethod::kTopK)
         .describe("Method for constructing pairs.");

diff --git a/src/metric/rank_metric.cc b/src/metric/rank_metric.cc
@@ -112,7 +112,6 @@ class PerGroupWeightPolicy {
     return info.GetWeight(group_id);
   }
 };
-
 }  // anonymous namespace
 
 namespace xgboost::metric {

diff --git a/src/objective/init_estimation.cc b/src/objective/init_estimation.cc
@@ -14,8 +14,7 @@
 #include "xgboost/linalg.h"              // Tensor,Vector
 #include "xgboost/task.h"                // ObjInfo
 
-namespace xgboost {
-namespace obj {
+namespace xgboost::obj {
 void FitIntercept::InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const {
   if (this->Task().task == ObjInfo::kRegression) {
     CheckInitInputs(info);
@@ -31,14 +30,13 @@ void FitIntercept::InitEstimation(MetaInfo const& info, linalg::Vector<float>* b
       ObjFunction::Create(get<String const>(config["name"]), this->ctx_)};
   new_obj->LoadConfig(config);
   new_obj->GetGradient(dummy_predt, info, 0, &gpair);
+
   bst_target_t n_targets = this->Targets(info);
   linalg::Vector<float> leaf_weight;
   tree::FitStump(this->ctx_, info, gpair, n_targets, &leaf_weight);
-
   // workaround, we don't support multi-target due to binary model serialization for
   // base margin.
   common::Mean(this->ctx_, leaf_weight, base_score);
   this->PredTransform(base_score->Data());
 }
-}  // namespace obj
-}  // namespace xgboost
+}  // namespace xgboost::obj
diff --git a/src/objective/init_estimation.h b/src/objective/init_estimation.h
@@ -7,8 +7,7 @@
 #include "xgboost/linalg.h"     // Tensor
 #include "xgboost/objective.h"  // ObjFunction
 
-namespace xgboost {
-namespace obj {
+namespace xgboost::obj {
 class FitIntercept : public ObjFunction {
   void InitEstimation(MetaInfo const& info, linalg::Vector<float>* base_score) const override;
 };
@@ -20,6 +19,5 @@ inline void CheckInitInputs(MetaInfo const& info) {
         << "Number of weights should be equal to number of data points.";
   }
 }
-}  // namespace obj
-}  // namespace xgboost
+}  // namespace xgboost::obj
 #endif  // XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_
diff --git a/src/objective/lambdarank_obj.cu b/src/objective/lambdarank_obj.cu
@@ -0,0 +1,62 @@
+/**
+ * Copyright 2015-2023 by XGBoost contributors
+ *
+ * \brief CUDA implementation of lambdarank.
+ */
+#include <thrust/fill.h>                        // for fill_n
+#include <thrust/for_each.h>                    // for for_each_n
+#include <thrust/iterator/counting_iterator.h>  // for make_counting_iterator
+#include <thrust/iterator/zip_iterator.h>       // for make_zip_iterator
+#include <thrust/tuple.h>                       // for make_tuple, tuple, tie, get
+
+#include <algorithm>                            // for min
+#include <cassert>                              // for assert
+#include <cmath>                                // for abs, log2, isinf
+#include <cstddef>                              // for size_t
+#include <cstdint>                              // for int32_t
+#include <memory>                               // for shared_ptr
+#include <utility>
+
+#include "../common/algorithm.cuh"       // for SegmentedArgSort
+#include "../common/cuda_context.cuh"    // for CUDAContext
+#include "../common/deterministic.cuh"   // for CreateRoundingFactor, TruncateWithRounding
+#include "../common/device_helpers.cuh"  // for SegmentId, TemporaryArray, AtomicAddGpair
+#include "../common/optional_weight.h"   // for MakeOptionalWeights
+#include "../common/ranking_utils.h"     // for NDCGCache, LambdaRankParam, rel_degree_t
+#include "lambdarank_obj.cuh"
+#include "lambdarank_obj.h"
+#include "xgboost/base.h"                // for bst_group_t, XGBOOST_DEVICE, GradientPair
+#include "xgboost/context.h"             // for Context
+#include "xgboost/data.h"                // for MetaInfo
+#include "xgboost/host_device_vector.h"  // for HostDeviceVector
+#include "xgboost/linalg.h"              // for VectorView, Range, Vector
+#include "xgboost/logging.h"
+#include "xgboost/span.h"                // for Span
+
+namespace xgboost::obj {
+DMLC_REGISTRY_FILE_TAG(lambdarank_obj_cu);
+
+namespace cuda_impl {
+common::Span<std::size_t const> SortY(Context const* ctx, MetaInfo const& info,
+                                      common::Span<std::size_t const> d_rank,
+                                      std::shared_ptr<ltr::RankingCache> p_cache) {
+  auto const d_group_ptr = p_cache->DataGroupPtr(ctx);
+  auto label = info.labels.View(ctx->gpu_id);
+  // The buffer for ranked y is necessary as cub segmented sort accepts only pointer.
+  auto d_y_ranked = p_cache->RankedY(ctx, info.num_row_);
+  thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), d_y_ranked.size(),
+                     [=] XGBOOST_DEVICE(std::size_t i) {
+                       auto g = dh::SegmentId(d_group_ptr, i);
+                       auto g_label =
+                           label.Slice(linalg::Range(d_group_ptr[g], d_group_ptr[g + 1]), 0);
+                       auto g_rank_idx = d_rank.subspan(d_group_ptr[g], g_label.Size());
+                       i -= d_group_ptr[g];
+                       auto g_y_ranked = d_y_ranked.subspan(d_group_ptr[g], g_label.Size());
+                       g_y_ranked[i] = g_label(g_rank_idx[i]);
+                     });
+  auto d_y_sorted_idx = p_cache->SortedIdxY(ctx, info.num_row_);
+  common::SegmentedArgSort<false, true>(ctx, d_y_ranked, d_group_ptr, d_y_sorted_idx);
+  return d_y_sorted_idx;
+}
+}  // namespace cuda_impl
+}  // namespace xgboost::obj
diff --git a/src/objective/lambdarank_obj.cuh b/src/objective/lambdarank_obj.cuh
@@ -0,0 +1,172 @@
+/**
+ * Copyright 2023 XGBoost contributors
+ */
+#ifndef XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_CUH_
+#define XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_CUH_
+
+#include <thrust/binary_search.h>                      // for lower_bound, upper_bound
+#include <thrust/functional.h>                         // for greater
+#include <thrust/iterator/counting_iterator.h>         // for make_counting_iterator
+#include <thrust/random/linear_congruential_engine.h>  // for minstd_rand
+#include <thrust/random/uniform_int_distribution.h>    // for uniform_int_distribution
+
+#include <cassert>                                     // for cassert
+#include <cstddef>                                     // for size_t
+#include <cstdint>                                     // for int32_t
+#include <tuple>                                       // for make_tuple, tuple
+
+#include "../common/device_helpers.cuh"                // for MakeTransformIterator
+#include "../common/ranking_utils.cuh"                 // for PairsForGroup
+#include "../common/ranking_utils.h"                   // for RankingCache
+#include "../common/threading_utils.cuh"               // for UnravelTrapeziodIdx
+#include "xgboost/base.h"    // for bst_group_t, GradientPair, XGBOOST_DEVICE
+#include "xgboost/data.h"    // for MetaInfo
+#include "xgboost/linalg.h"  // for VectorView, Range, UnravelIndex
+#include "xgboost/span.h"    // for Span
+
+namespace xgboost::obj::cuda_impl {
+/**
+ * \brief Find number of elements left to the label bucket
+ */
+template <typename It, typename T = typename std::iterator_traits<It>::value_type>
+XGBOOST_DEVICE __forceinline__ std::size_t CountNumItemsToTheLeftOf(It items, std::size_t n, T v) {
+  return thrust::lower_bound(thrust::seq, items, items + n, v, thrust::greater<T>{}) - items;
+}
+/**
+ * \brief Find number of elements right to the label bucket
+ */
+template <typename It, typename T = typename std::iterator_traits<It>::value_type>
+XGBOOST_DEVICE __forceinline__ std::size_t CountNumItemsToTheRightOf(It items, std::size_t n, T v) {
+  return n - (thrust::upper_bound(thrust::seq, items, items + n, v, thrust::greater<T>{}) - items);
+}
+/**
+ * \brief Sort labels according to rank list for making pairs.
+ */
+common::Span<std::size_t const> SortY(Context const *ctx, MetaInfo const &info,
+                                      common::Span<std::size_t const> d_rank,
+                                      std::shared_ptr<ltr::RankingCache> p_cache);
+
+/**
+ * \brief Parameters needed for calculating gradient
+ */
+struct KernelInputs {
+  linalg::VectorView<double const> ti_plus;   // input bias ratio
+  linalg::VectorView<double const> tj_minus;  // input bias ratio
+  linalg::VectorView<double> li;
+  linalg::VectorView<double> lj;
+
+  common::Span<bst_group_t const> d_group_ptr;
+  common::Span<std::size_t const> d_threads_group_ptr;
+  common::Span<std::size_t const> d_sorted_idx;
+
+  linalg::MatrixView<float const> labels;
+  common::Span<float const> predts;
+  common::Span<GradientPair> gpairs;
+
+  linalg::VectorView<GradientPair const> d_roundings;
+  double const *d_cost_rounding;
+
+  common::Span<std::size_t const> d_y_sorted_idx;
+
+  std::int32_t iter;
+};
+/**
+ * \brief Functor for generating pairs
+ */
+template <bool has_truncation>
+struct MakePairsOp {
+  KernelInputs args;
+  /**
+   * \brief Make pair for the topk pair method.
+   */
+  XGBOOST_DEVICE std::tuple<std::size_t, std::size_t> WithTruncation(std::size_t idx,
+                                                                     bst_group_t g) const {
+    auto thread_group_begin = args.d_threads_group_ptr[g];
+    auto idx_in_thread_group = idx - thread_group_begin;
+
+    auto data_group_begin = static_cast<std::size_t>(args.d_group_ptr[g]);
+    std::size_t n_data = args.d_group_ptr[g + 1] - data_group_begin;
+    // obtain group segment data.
+    auto g_label = args.labels.Slice(linalg::Range(data_group_begin, data_group_begin + n_data), 0);
+    auto g_sorted_idx = args.d_sorted_idx.subspan(data_group_begin, n_data);
+
+    std::size_t i = 0, j = 0;
+    common::UnravelTrapeziodIdx(idx_in_thread_group, n_data, &i, &j);
+
+    std::size_t rank_high = i, rank_low = j;
+    return std::make_tuple(rank_high, rank_low);
+  }
+  /**
+   * \brief Make pair for the mean pair method
+   */
+  XGBOOST_DEVICE std::tuple<std::size_t, std::size_t> WithSampling(std::size_t idx,
+                                                                   bst_group_t g) const {
+    std::size_t n_samples = args.labels.Size();
+    assert(n_samples == args.predts.size());
+    // Constructed from ranking cache.
+    std::size_t n_pairs =
+        ltr::cuda_impl::PairsForGroup(args.d_threads_group_ptr[g + 1] - args.d_threads_group_ptr[g],
+                                      args.d_group_ptr[g + 1] - args.d_group_ptr[g]);
+
+    assert(n_pairs > 0);
+    auto [sample_idx, sample_pair_idx] = linalg::UnravelIndex(idx, {n_samples, n_pairs});
+
+    auto g_begin = static_cast<std::size_t>(args.d_group_ptr[g]);
+    std::size_t n_data = args.d_group_ptr[g + 1] - g_begin;
+
+    auto g_label = args.labels.Slice(linalg::Range(g_begin, g_begin + n_data));
+    auto g_rank_idx = args.d_sorted_idx.subspan(args.d_group_ptr[g], n_data);
+    auto g_y_sorted_idx = args.d_y_sorted_idx.subspan(g_begin, n_data);
+
+    std::size_t const i = sample_idx - g_begin;
+    assert(sample_pair_idx < n_samples);
+    assert(i <= sample_idx);
+
+    auto g_sorted_label = dh::MakeTransformIterator<float>(
+        thrust::make_counting_iterator(0ul),
+        [&](std::size_t i) { return g_label(g_rank_idx[g_y_sorted_idx[i]]); });
+
+    // Are the labels diverse enough? If they are all the same, then there is nothing to pick
+    // from another group - bail sooner
+    if (g_label.Size() == 0 || g_sorted_label[0] == g_sorted_label[n_data - 1]) {
+      auto z = static_cast<std::size_t>(0ul);
+      return std::make_tuple(z, z);
+    }
+
+    std::size_t n_lefts = CountNumItemsToTheLeftOf(g_sorted_label, i + 1, g_sorted_label[i]);
+    std::size_t n_rights =
+        CountNumItemsToTheRightOf(g_sorted_label + i, n_data - i, g_sorted_label[i]);
+    // The index pointing to the first element of the next bucket
+    std::size_t right_bound = n_data - n_rights;
+
+    thrust::minstd_rand rng(args.iter);
+    auto pair_idx = i;
+    rng.discard(sample_pair_idx * n_data + g + pair_idx);  // fixme
+    thrust::uniform_int_distribution<std::size_t> dist(0, n_lefts + n_rights - 1);
+    auto ridx = dist(rng);
+    SPAN_CHECK(ridx < n_lefts + n_rights);
+    if (ridx >= n_lefts) {
+      ridx = ridx - n_lefts + right_bound;  // fixme
+    }
+
+    auto idx0 = g_y_sorted_idx[pair_idx];
+    auto idx1 = g_y_sorted_idx[ridx];
+
+    return std::make_tuple(idx0, idx1);
+  }
+  /**
+   * \brief Generate a single pair.
+   *
+   * \param idx Pair index (CUDA thread index).
+   * \param g   Query group index.
+   */
+  XGBOOST_DEVICE auto operator()(std::size_t idx, bst_group_t g) const {
+    if (has_truncation) {
+      return this->WithTruncation(idx, g);
+    } else {
+      return this->WithSampling(idx, g);
+    }
+  }
+};
+}  // namespace xgboost::obj::cuda_impl
+#endif  // XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_CUH_
-Original file line number
+Diff line change
@@ Expand Up / @@ -112,7 +112,6 @@ class PerGroupWeightPolicy { @@
         return info.GetWeight(group_id);
       }
     };
     }  // anonymous namespace
     namespace xgboost::metric {
@@ Expand Down @@