microsoft · guolinke · Oct 27, 2020 · Sep 30, 2020 · Sep 30, 2020 · Oct 9, 2020
@@ -45,9 +45,9 @@ test_that("learning-to-rank with lgb.train() works as expected", {
         expect_identical(result[["data_name"]], "training")
     }
     expect_identical(sapply(eval_results, function(x) {x$name}), eval_names)
-    expect_equal(eval_results[[1L]][["value"]], 0.825)
-    expect_true(abs(eval_results[[2L]][["value"]] - 0.7766434) < TOLERANCE)
-    expect_true(abs(eval_results[[3L]][["value"]] - 0.7527939) < TOLERANCE)
+    expect_equal(eval_results[[1L]][["value"]], 0.775)
+    expect_true(abs(eval_results[[2L]][["value"]] - 0.745986) < TOLERANCE)
+    expect_true(abs(eval_results[[3L]][["value"]] - 0.7351959) < TOLERANCE)
 })
 
 test_that("learning-to-rank with lgb.cv() works as expected", {
@@ -93,7 +93,7 @@ test_that("learning-to-rank with lgb.cv() works as expected", {
     best_score <- cv_bst$best_score
     expect_true(best_iter > 0L && best_iter <= nrounds)
     expect_true(best_score > 0.0 && best_score < 1.0)
-    expect_true(abs(best_score - 0.775) < TOLERANCE)
+    expect_true(abs(best_score - 0.75) < TOLERANCE)
 
     # best_score should be set for the first metric
     first_metric <- eval_names[[1L]]
@@ -115,18 +115,18 @@ test_that("learning-to-rank with lgb.cv() works as expected", {
     }
 
     # first and last value of each metric should be as expected
-    ndcg1_values <- c(0.725, 0.75, 0.75, 0.775, 0.75, 0.75, 0.75, 0.75, 0.75, 0.75)
+    ndcg1_values <- c(0.675, 0.725, 0.65, 0.725, 0.75, 0.725, 0.75, 0.725, 0.75, 0.75)
     expect_true(all(abs(unlist(eval_results[["ndcg@1"]][["eval"]]) - ndcg1_values) < TOLERANCE))
 
     ndcg2_values <- c(
-        0.6863147, 0.720986, 0.7306574, 0.745986, 0.7306574,
-        0.720986, 0.7403287, 0.7403287, 0.7403287, 0.7306574
+        0.6556574, 0.6669721, 0.6306574, 0.6476294, 0.6629581,
+        0.6476294, 0.6629581, 0.6379581, 0.7113147, 0.6823008
     )
     expect_true(all(abs(unlist(eval_results[["ndcg@2"]][["eval"]]) - ndcg2_values) < TOLERANCE))
 
     ndcg3_values <- c(
-        0.6777939, 0.6984639, 0.711732, 0.7234639, 0.711732,
-        0.7101959, 0.719134, 0.719134, 0.725, 0.711732
+        0.6484639, 0.6571238, 0.6469279, 0.6540516, 0.6481857,
+        0.6481857, 0.6481857, 0.6466496, 0.7027939, 0.6629898
     )
     expect_true(all(abs(unlist(eval_results[["ndcg@3"]][["eval"]]) - ndcg3_values) < TOLERANCE))
 

@@ -951,11 +951,13 @@ Objective Parameters
 
    -  set this closer to ``1`` to shift towards a **Poisson** distribution
 
--  ``lambdarank_truncation_level`` :raw-html:`<a id="lambdarank_truncation_level" title="Permalink to this parameter" href="#lambdarank_truncation_level">&#x1F517;&#xFE0E;</a>`, default = ``20``, type = int, constraints: ``lambdarank_truncation_level > 0``
+-  ``lambdarank_truncation_level`` :raw-html:`<a id="lambdarank_truncation_level" title="Permalink to this parameter" href="#lambdarank_truncation_level">&#x1F517;&#xFE0E;</a>`, default = ``30``, type = int, constraints: ``lambdarank_truncation_level > 0``
 
    -  used only in ``lambdarank`` application
 
-   -  used for truncating the max DCG, refer to "truncation level" in the Sec. 3 of `LambdaMART paper <https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf>`__
+   -  controls the number of top-results to focus on during training, refer to "truncation level" in the Sec. 3 of `LambdaMART paper <https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf>`
+
+   -  is closely related to the desirable cutoff k in the metric NDCG@k that we aim at optimizing the ranker for. The optimal setting for this parameter is likely to be slightly higher than k (e.g., k + 3) to include more pairs of documents to train on, but perhaps not too high to avoid deviating too much from the desired target metric NDCG@k
 
 -  ``lambdarank_norm`` :raw-html:`<a id="lambdarank_norm" title="Permalink to this parameter" href="#lambdarank_norm">&#x1F517;&#xFE0E;</a>`, default = ``true``, type = bool
 

@@ -835,8 +835,9 @@ struct Config {
 
   // check = >0
   // desc = used only in ``lambdarank`` application
-  // desc = used for truncating the max DCG, refer to "truncation level" in the Sec. 3 of `LambdaMART paper <https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf>`__
-  int lambdarank_truncation_level = 20;
+  // desc = controls the number of top-results to focus on during training, refer to "truncation level" in the Sec. 3 of `LambdaMART paper <https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/MSR-TR-2010-82.pdf>`
+  // desc = is closely related to the desirable cutoff k in the metric NDCG@k that we aim at optimizing the ranker for. The optimal setting for this parameter is likely to be slightly higher than k (e.g., k + 3) to include more pairs of documents to train on, but perhaps not too high to avoid deviating too much from the desired target metric NDCG@k
+  int lambdarank_truncation_level = 30;
 
   // desc = used only in ``lambdarank`` application
   // desc = set this to ``true`` to normalize the lambdas for different queries, and improve the performance for unbalanced data

@@ -163,35 +163,34 @@ class LambdarankNDCG : public RankingObjective {
     }
     const double worst_score = score[sorted_idx[worst_idx]];
     double sum_lambdas = 0.0;
-    // start accmulate lambdas by pairs
-    for (data_size_t i = 0; i < cnt; ++i) {
-      const data_size_t high = sorted_idx[i];
-      const int high_label = static_cast<int>(label[high]);
-      const double high_score = score[high];
-      if (high_score == kMinScore) {
-        continue;
-      }
-      const double high_label_gain = label_gain_[high_label];
-      const double high_discount = DCGCalculator::GetDiscount(i);
-      double high_sum_lambda = 0.0;
-      double high_sum_hessian = 0.0;
-      for (data_size_t j = 0; j < cnt; ++j) {
-        // skip same data
-        if (i == j) {
-          continue;
+    // start accmulate lambdas by pairs that contain at least one document above truncation level
+    for (data_size_t i = 0; i < cnt - 1 && i < truncation_level_; ++i) {
+      if (score[sorted_idx[i]] == kMinScore) { continue; }
+      for (data_size_t j = i + 1; j < cnt; ++j) {
+        if (score[sorted_idx[j]] == kMinScore) { continue; }
+        // skip pairs with the same labels
+        if (label[sorted_idx[i]] == label[sorted_idx[j]]) { continue; }
+        data_size_t high_rank, low_rank;
+        if (label[sorted_idx[i]] > label[sorted_idx[j]]) {
+          high_rank = i;
+          low_rank = j;
+        } else {
+          high_rank = j;
+          low_rank = i;
         }
-        const data_size_t low = sorted_idx[j];
+        const data_size_t high = sorted_idx[high_rank];
+        const int high_label = static_cast<int>(label[high]);
+        const double high_score = score[high];
+        const double high_label_gain = label_gain_[high_label];
+        const double high_discount = DCGCalculator::GetDiscount(high_rank);
+        const data_size_t low = sorted_idx[low_rank];
         const int low_label = static_cast<int>(label[low]);
         const double low_score = score[low];
-        // only consider pair with different label
-        if (high_label <= low_label || low_score == kMinScore) {
-          continue;
-        }
+        const double low_label_gain = label_gain_[low_label];
+        const double low_discount = DCGCalculator::GetDiscount(low_rank);
 
         const double delta_score = high_score - low_score;
 
-        const double low_label_gain = label_gain_[low_label];
-        const double low_discount = DCGCalculator::GetDiscount(j);
         // get dcg gap
         const double dcg_gap = high_label_gain - low_label_gain;
         // get discount of this pair
@@ -208,16 +207,13 @@ class LambdarankNDCG : public RankingObjective {
         // update
         p_lambda *= -sigmoid_ * delta_pair_NDCG;
         p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG;
-        high_sum_lambda += p_lambda;
-        high_sum_hessian += p_hessian;
         lambdas[low] -= static_cast<score_t>(p_lambda);
         hessians[low] += static_cast<score_t>(p_hessian);
+        lambdas[high] += static_cast<score_t>(p_lambda);
+        hessians[high] += static_cast<score_t>(p_hessian);
         // lambda is negative, so use minus to accumulate
         sum_lambdas -= 2 * p_lambda;
       }
-      // update
-      lambdas[high] += static_cast<score_t>(high_sum_lambda);
-      hessians[high] += static_cast<score_t>(high_sum_hessian);
     }
     if (norm_ && sum_lambdas > 0) {
       double norm_factor = std::log2(1 + sum_lambdas) / sum_lambdas;

@@ -119,8 +119,8 @@ def test_lambdarank(self):
                 eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False,
                 callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))])
         self.assertLessEqual(gbm.best_iteration_, 24)
-        self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.5769)
-        self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.5920)
+        self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.5674)
+        self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.578)
 
     def test_xendcg(self):
         dir_path = os.path.dirname(os.path.realpath(__file__))