From d339e16759f33f12e235a990bc6e4b8f17b0900a Mon Sep 17 00:00:00 2001 From: guolinke Date: Sat, 22 Feb 2020 14:30:55 +0800 Subject: [PATCH 1/6] code refactoring --- src/objective/rank_objective.hpp | 198 +++++++++++++++++++----- src/objective/rank_xendcg_objective.hpp | 141 ----------------- 2 files changed, 155 insertions(+), 184 deletions(-) delete mode 100644 src/objective/rank_xendcg_objective.hpp diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index f564c87926b2..ab60ab0d4b94 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -17,12 +17,73 @@ #include namespace LightGBM { + +/*! + * \brief Objective function for Ranking + */ +class RankingObjective : public ObjectiveFunction { + public: + explicit RankingObjective(const Config&) { + } + + explicit RankingObjective(const std::vector&) {} + + ~RankingObjective() {} + + void Init(const Metadata& metadata, data_size_t num_data) override { + num_data_ = num_data; + // get label + label_ = metadata.label(); + DCGCalculator::CheckLabel(label_, num_data_); + // get weights + weights_ = metadata.weights(); + // get boundries + query_boundaries_ = metadata.query_boundaries(); + if (query_boundaries_ == nullptr) { + Log::Fatal("Ranking tasks require query information"); + } + num_queries_ = metadata.num_queries(); + } + + void GetGradients(const double* score, score_t* gradients, + score_t* hessians) const override { +#pragma omp parallel for schedule(guided) + for (data_size_t i = 0; i < num_queries_; ++i) { + GetGradientsForOneQuery(score, gradients, hessians, i); + } + } + + virtual void GetGradientsForOneQuery(const double* score, score_t* lambdas, + score_t* hessians, + data_size_t query_id) const = 0; + + virtual const char* GetName() const override = 0; + + std::string ToString() const override { + std::stringstream str_buf; + str_buf << GetName(); + return str_buf.str(); + } + + bool NeedAccuratePrediction() const override { return false; } + + protected: + data_size_t num_queries_; + /*! \brief Number of data */ + data_size_t num_data_; + /*! \brief Pointer of label */ + const label_t* label_; + /*! \brief Pointer of weights */ + const label_t* weights_; + /*! \brief Query boundries */ + const data_size_t* query_boundaries_; +}; /*! * \brief Objective function for Lambdrank with NDCG */ -class LambdarankNDCG: public ObjectiveFunction { +class LambdarankNDCG : public RankingObjective { public: - explicit LambdarankNDCG(const Config& config) { + explicit LambdarankNDCG(const Config& config) : RankingObjective(config) { sigmoid_ = static_cast(config.sigmoid); norm_ = config.lambdamart_norm; label_gain_ = config.label_gain; @@ -38,25 +99,14 @@ class LambdarankNDCG: public ObjectiveFunction { } } - explicit LambdarankNDCG(const std::vector&) { - } + explicit LambdarankNDCG(const std::vector& strs) + : RankingObjective(strs) {} ~LambdarankNDCG() { } + void Init(const Metadata& metadata, data_size_t num_data) override { - num_data_ = num_data; - // get label - label_ = metadata.label(); - DCGCalculator::CheckLabel(label_, num_data_); - // get weights - weights_ = metadata.weights(); - // get boundries - query_boundaries_ = metadata.query_boundaries(); - if (query_boundaries_ == nullptr) { - Log::Fatal("Lambdarank tasks require query information"); - } - num_queries_ = metadata.num_queries(); - // cache inverse max DCG, avoid computation many times + RankingObjective::Init(metadata, num_data); inverse_max_dcgs_.resize(num_queries_); #pragma omp parallel for schedule(static) for (data_size_t i = 0; i < num_queries_; ++i) { @@ -72,14 +122,6 @@ class LambdarankNDCG: public ObjectiveFunction { ConstructSigmoidTable(); } - void GetGradients(const double* score, score_t* gradients, - score_t* hessians) const override { - #pragma omp parallel for schedule(guided) - for (data_size_t i = 0; i < num_queries_; ++i) { - GetGradientsForOneQuery(score, gradients, hessians, i); - } - } - inline void GetGradientsForOneQuery(const double* score, score_t* lambdas, score_t* hessians, data_size_t query_id) const { // get doc boundary for current query @@ -212,14 +254,6 @@ class LambdarankNDCG: public ObjectiveFunction { return "lambdarank"; } - std::string ToString() const override { - std::stringstream str_buf; - str_buf << GetName(); - return str_buf.str(); - } - - bool NeedAccuratePrediction() const override { return false; } - private: /*! \brief Gains for labels */ std::vector label_gain_; @@ -231,16 +265,6 @@ class LambdarankNDCG: public ObjectiveFunction { bool norm_; /*! \brief Optimized NDCG@ */ int optimize_pos_at_; - /*! \brief Number of queries */ - data_size_t num_queries_; - /*! \brief Number of data */ - data_size_t num_data_; - /*! \brief Pointer of label */ - const label_t* label_; - /*! \brief Pointer of weights */ - const label_t* weights_; - /*! \brief Query boundries */ - const data_size_t* query_boundaries_; /*! \brief Cache result for sigmoid transform to speed up */ std::vector sigmoid_table_; /*! \brief Number of bins in simoid table */ @@ -253,5 +277,93 @@ class LambdarankNDCG: public ObjectiveFunction { double sigmoid_table_idx_factor_; }; + +/*! + * \brief Implementation of the learning-to-rank objective function, XE_NDCG + * [arxiv.org/abs/1911.09798]. + */ +class RankXENDCG : public RankingObjective { + public: + explicit RankXENDCG(const Config& config) : RankingObjective(config), rand_(config.objective_seed) { + } + + explicit RankXENDCG(const std::vector& strs) + : RankingObjective(strs), rand_() {} + + ~RankXENDCG() {} + + inline void GetGradientsForOneQuery(const double* score, score_t* lambdas, + score_t* hessians, + data_size_t query_id) const { + // get doc boundary for current query + const data_size_t start = query_boundaries_[query_id]; + const data_size_t cnt = + query_boundaries_[query_id + 1] - query_boundaries_[query_id]; + // add pointers with offset + const label_t* label = label_ + start; + score += start; + lambdas += start; + hessians += start; + + // Turn scores into a probability distribution using Softmax. + std::vector rho(cnt); + Common::Softmax(score, rho.data(), cnt); + + // Prepare a vector of gammas, a parameter of the loss. + std::vector gammas(cnt); + for (data_size_t i = 0; i < cnt; ++i) { + gammas[i] = rand_.NextFloat(); + } + + // Skip query if sum of labels is 0. + double sum_labels = 0; + for (data_size_t i = 0; i < cnt; ++i) { + sum_labels += static_cast(phi(label[i], gammas[i])); + } + if (std::fabs(sum_labels) < kEpsilon) { + return; + } + + // Approximate gradients and inverse Hessian. + // First order terms. + std::vector L1s(cnt); + for (data_size_t i = 0; i < cnt; ++i) { + L1s[i] = -phi(label[i], gammas[i]) / sum_labels + rho[i]; + } + // Second-order terms. + std::vector L2s(cnt); + for (data_size_t i = 0; i < cnt; ++i) { + for (data_size_t j = 0; j < cnt; ++j) { + if (i == j) continue; + L2s[i] += L1s[j] / (1 - rho[j]); + } + } + // Third-order terms. + std::vector L3s(cnt); + for (data_size_t i = 0; i < cnt; ++i) { + for (data_size_t j = 0; j < cnt; ++j) { + if (i == j) continue; + L3s[i] += rho[j] * L2s[j] / (1 - rho[j]); + } + } + + // Finally, prepare lambdas and hessians. + for (data_size_t i = 0; i < cnt; ++i) { + lambdas[i] = + static_cast(L1s[i] + rho[i] * L2s[i] + rho[i] * L3s[i]); + hessians[i] = static_cast(rho[i] * (1.0 - rho[i])); + } + } + + double phi(const label_t l, double g) const { + return Common::Pow(2, static_cast(l)) - g; + } + + const char* GetName() const override { return "rank_xendcg"; } + + private: + mutable Random rand_; +}; + } // namespace LightGBM #endif // LightGBM_OBJECTIVE_RANK_OBJECTIVE_HPP_ diff --git a/src/objective/rank_xendcg_objective.hpp b/src/objective/rank_xendcg_objective.hpp deleted file mode 100644 index 1f9d4ae75327..000000000000 --- a/src/objective/rank_xendcg_objective.hpp +++ /dev/null @@ -1,141 +0,0 @@ -/*! - * Copyright (c) 2019 Microsoft Corporation. All rights reserved. - * Licensed under the MIT License. See LICENSE file in the project root for license information. - */ -#ifndef LIGHTGBM_OBJECTIVE_RANK_XENDCG_OBJECTIVE_HPP_ -#define LIGHTGBM_OBJECTIVE_RANK_XENDCG_OBJECTIVE_HPP_ - -#include -#include -#include - -#include -#include - -namespace LightGBM { -/*! -* \brief Implementation of the learning-to-rank objective function, XE_NDCG [arxiv.org/abs/1911.09798]. -*/ -class RankXENDCG: public ObjectiveFunction { - public: - explicit RankXENDCG(const Config& config) { - rand_ = new Random(config.objective_seed); - } - - explicit RankXENDCG(const std::vector&) { - rand_ = new Random(); - } - - ~RankXENDCG() { - } - void Init(const Metadata& metadata, data_size_t) override { - // get label - label_ = metadata.label(); - // get boundries - query_boundaries_ = metadata.query_boundaries(); - if (query_boundaries_ == nullptr) { - Log::Fatal("RankXENDCG tasks require query information"); - } - num_queries_ = metadata.num_queries(); - } - - void GetGradients(const double* score, score_t* gradients, - score_t* hessians) const override { - #pragma omp parallel for schedule(guided) - for (data_size_t i = 0; i < num_queries_; ++i) { - GetGradientsForOneQuery(score, gradients, hessians, i); - } - } - - inline void GetGradientsForOneQuery( - const double* score, - score_t* lambdas, score_t* hessians, data_size_t query_id) const { - // get doc boundary for current query - const data_size_t start = query_boundaries_[query_id]; - const data_size_t cnt = - query_boundaries_[query_id + 1] - query_boundaries_[query_id]; - // add pointers with offset - const label_t* label = label_ + start; - score += start; - lambdas += start; - hessians += start; - - // Turn scores into a probability distribution using Softmax. - std::vector rho(cnt); - Common::Softmax(score, &rho[0], cnt); - - // Prepare a vector of gammas, a parameter of the loss. - std::vector gammas(cnt); - for (data_size_t i = 0; i < cnt; ++i) { - gammas[i] = rand_->NextFloat(); - } - - // Skip query if sum of labels is 0. - float sum_labels = 0; - for (data_size_t i = 0; i < cnt; ++i) { - sum_labels += static_cast(phi(label[i], gammas[i])); - } - if (std::fabs(sum_labels) < kEpsilon) { - return; - } - - // Approximate gradients and inverse Hessian. - // First order terms. - std::vector L1s(cnt); - for (data_size_t i = 0; i < cnt; ++i) { - L1s[i] = -phi(label[i], gammas[i])/sum_labels + rho[i]; - } - // Second-order terms. - std::vector L2s(cnt); - for (data_size_t i = 0; i < cnt; ++i) { - for (data_size_t j = 0; j < cnt; ++j) { - if (i == j) continue; - L2s[i] += L1s[j] / (1 - rho[j]); - } - } - // Third-order terms. - std::vector L3s(cnt); - for (data_size_t i = 0; i < cnt; ++i) { - for (data_size_t j = 0; j < cnt; ++j) { - if (i == j) continue; - L3s[i] += rho[j] * L2s[j] / (1 - rho[j]); - } - } - - // Finally, prepare lambdas and hessians. - for (data_size_t i = 0; i < cnt; ++i) { - lambdas[i] = static_cast( - L1s[i] + rho[i]*L2s[i] + rho[i]*L3s[i]); - hessians[i] = static_cast(rho[i] * (1.0 - rho[i])); - } - } - - double phi(const label_t l, double g) const { - return Common::Pow(2, static_cast(l)) - g; - } - - const char* GetName() const override { - return "rank_xendcg"; - } - - std::string ToString() const override { - std::stringstream str_buf; - str_buf << GetName(); - return str_buf.str(); - } - - bool NeedAccuratePrediction() const override { return false; } - - private: - /*! \brief Number of queries */ - data_size_t num_queries_; - /*! \brief Pointer of label */ - const label_t* label_; - /*! \brief Query boundries */ - const data_size_t* query_boundaries_; - /*! \brief Pseudo-random number generator */ - Random* rand_; -}; - -} // namespace LightGBM -#endif // LightGBM_OBJECTIVE_RANK_XENDCG_OBJECTIVE_HPP_ From df702924213c0142e7e11e5f445d16263626a377 Mon Sep 17 00:00:00 2001 From: guolinke Date: Sat, 22 Feb 2020 14:31:27 +0800 Subject: [PATCH 2/6] update vcproject --- windows/LightGBM.vcxproj | 3 +-- windows/LightGBM.vcxproj.filters | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/windows/LightGBM.vcxproj b/windows/LightGBM.vcxproj index d70248f64178..19d3312e628c 100644 --- a/windows/LightGBM.vcxproj +++ b/windows/LightGBM.vcxproj @@ -243,7 +243,6 @@ - @@ -291,4 +290,4 @@ - + \ No newline at end of file diff --git a/windows/LightGBM.vcxproj.filters b/windows/LightGBM.vcxproj.filters index 4f706fb17c42..aeccbaf84b23 100644 --- a/windows/LightGBM.vcxproj.filters +++ b/windows/LightGBM.vcxproj.filters @@ -84,9 +84,6 @@ src\objective - - src\objective - src\objective @@ -312,4 +309,4 @@ src\io - + \ No newline at end of file From 8a9dd46621f9023e1db30a4535cedc54a4951e48 Mon Sep 17 00:00:00 2001 From: guolinke Date: Sun, 23 Feb 2020 12:21:20 +0800 Subject: [PATCH 3/6] refine --- docs/Parameters.rst | 24 +-- include/LightGBM/config.h | 19 +-- src/io/config_auto.cpp | 22 +-- src/objective/objective_function.cpp | 1 - src/objective/rank_objective.hpp | 227 +++++++++++++-------------- 5 files changed, 143 insertions(+), 150 deletions(-) diff --git a/docs/Parameters.rst b/docs/Parameters.rst index 7cd809c3f2be..8011ba729d73 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -99,7 +99,9 @@ Core Parameters - ``lambdarank``, `lambdarank `__ objective. `label_gain <#label_gain>`__ can be used to set the gain (weight) of ``int`` label and all values in ``label`` must be smaller than number of elements in ``label_gain`` - - ``rank_xendcg``, `XE_NDCG_MART `__ ranking objective function. To obtain reproducible results, you should disable parallelism by setting ``num_threads`` to 1, aliases: ``xendcg``, ``xe_ndcg``, ``xe_ndcg_mart``, ``xendcg_mart`` + - ``rank_xendcg``, `XE_NDCG_MART `__ ranking objective function. aliases: ``xendcg``, ``xe_ndcg``, ``xe_ndcg_mart``, ``xendcg_mart``. + + - ``rank_xendcg`` is faster than ``lambdarank`` and achieves the similar performance as ``lambdarank`` - label should be ``int`` type, and larger number represents the higher relevance (e.g. 0:bad, 1:fair, 2:good, 3:perfect) @@ -790,6 +792,12 @@ IO Parameters Objective Parameters -------------------- +- ``objective_seed`` :raw-html:`🔗︎`, default = ``5``, type = int + + - random seed for objectives, if random process is needed + + - used in ``rank_xendcg`` + - ``num_class`` :raw-html:`🔗︎`, default = ``1``, type = int, aliases: ``num_classes``, constraints: ``num_class > 0`` - used only in ``multi-class`` classification application @@ -862,19 +870,19 @@ Objective Parameters - set this closer to ``1`` to shift towards a **Poisson** distribution -- ``max_position`` :raw-html:`🔗︎`, default = ``20``, type = int, constraints: ``max_position > 0`` +- ``lambdarank_truncation_level`` :raw-html:`🔗︎`, default = ``20``, type = int, constraints: ``lambdarank_truncation_level > 0`` - used only in ``lambdarank`` application - - optimizes `NDCG `__ at this position + - used for truncating the max_ndcg, refer to "truncation level" in the Sec.3 of `LambdaMART paper `__ . -- ``lambdamart_norm`` :raw-html:`🔗︎`, default = ``true``, type = bool +- ``lambdarank_norm`` :raw-html:`🔗︎`, default = ``true``, type = bool - used only in ``lambdarank`` application - set this to ``true`` to normalize the lambdas for different queries, and improve the performance for unbalanced data - - set this to ``false`` to enforce the original lambdamart algorithm + - set this to ``false`` to enforce the original lambdarank algorithm - ``label_gain`` :raw-html:`🔗︎`, default = ``0,1,3,7,15,31,63,...,2^30-1``, type = multi-double @@ -884,12 +892,6 @@ Objective Parameters - separate by ``,`` -- ``objective_seed`` :raw-html:`🔗︎`, default = ``5``, type = int - - - used only in the ``rank_xendcg`` objective - - - random seed for objectives - Metric Parameters ----------------- diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 88177bac2ee3..d9ccd56537a7 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -127,7 +127,8 @@ struct Config { // descl2 = label is anything in interval [0, 1] // desc = ranking application // descl2 = ``lambdarank``, `lambdarank `__ objective. `label_gain <#label_gain>`__ can be used to set the gain (weight) of ``int`` label and all values in ``label`` must be smaller than number of elements in ``label_gain`` - // descl2 = ``rank_xendcg``, `XE_NDCG_MART `__ ranking objective function. To obtain reproducible results, you should disable parallelism by setting ``num_threads`` to 1, aliases: ``xendcg``, ``xe_ndcg``, ``xe_ndcg_mart``, ``xendcg_mart`` + // descl2 = ``rank_xendcg``, `XE_NDCG_MART `__ ranking objective function. aliases: ``xendcg``, ``xe_ndcg``, ``xe_ndcg_mart``, ``xendcg_mart``. + // descl2 = ``rank_xendcg`` is faster than ``lambdarank`` and achieves the similar performance as ``lambdarank`` // descl2 = label should be ``int`` type, and larger number represents the higher relevance (e.g. 0:bad, 1:fair, 2:good, 3:perfect) std::string objective = "regression"; @@ -692,6 +693,10 @@ struct Config { #pragma region Objective Parameters + // desc = random seed for objectives, if random process is needed + // desc = used in ``rank_xendcg`` + int objective_seed = 5; + // check = >0 // alias = num_classes // desc = used only in ``multi-class`` classification application @@ -750,13 +755,13 @@ struct Config { // check = >0 // desc = used only in ``lambdarank`` application - // desc = optimizes `NDCG `__ at this position - int max_position = 20; + // desc = used for truncating the max_ndcg, refer to "truncation level" in the Sec.3 of `LambdaMART paper `__ . + int lambdarank_truncation_level = 20; // desc = used only in ``lambdarank`` application // desc = set this to ``true`` to normalize the lambdas for different queries, and improve the performance for unbalanced data - // desc = set this to ``false`` to enforce the original lambdamart algorithm - bool lambdamart_norm = true; + // desc = set this to ``false`` to enforce the original lambdarank algorithm + bool lambdarank_norm = true; // type = multi-double // default = 0,1,3,7,15,31,63,...,2^30-1 @@ -765,10 +770,6 @@ struct Config { // desc = separate by ``,`` std::vector label_gain; - // desc = used only in the ``rank_xendcg`` objective - // desc = random seed for objectives - int objective_seed = 5; - #pragma endregion #pragma region Metric Parameters diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp index efd6445b8711..8ceaf58d3a64 100644 --- a/src/io/config_auto.cpp +++ b/src/io/config_auto.cpp @@ -257,6 +257,7 @@ const std::unordered_set& Config::parameter_set() { "predict_disable_shape_check", "convert_model_language", "convert_model", + "objective_seed", "num_class", "is_unbalance", "scale_pos_weight", @@ -267,10 +268,9 @@ const std::unordered_set& Config::parameter_set() { "fair_c", "poisson_max_delta_step", "tweedie_variance_power", - "max_position", - "lambdamart_norm", + "lambdarank_truncation_level", + "lambdarank_norm", "label_gain", - "objective_seed", "metric", "metric_freq", "is_provide_training_metric", @@ -513,6 +513,8 @@ void Config::GetMembersFromString(const std::unordered_map0); @@ -541,17 +543,15 @@ void Config::GetMembersFromString(const std::unordered_map=1.0); CHECK(tweedie_variance_power <2.0); - GetInt(params, "max_position", &max_position); - CHECK(max_position >0); + GetInt(params, "lambdarank_truncation_level", &lambdarank_truncation_level); + CHECK(lambdarank_truncation_level >0); - GetBool(params, "lambdamart_norm", &lambdamart_norm); + GetBool(params, "lambdarank_norm", &lambdarank_norm); if (GetString(params, "label_gain", &tmp_str)) { label_gain = Common::StringToArray(tmp_str, ','); } - GetInt(params, "objective_seed", &objective_seed); - GetInt(params, "metric_freq", &metric_freq); CHECK(metric_freq >0); @@ -675,6 +675,7 @@ std::string Config::SaveMembersToString() const { str_buf << "[predict_disable_shape_check: " << predict_disable_shape_check << "]\n"; str_buf << "[convert_model_language: " << convert_model_language << "]\n"; str_buf << "[convert_model: " << convert_model << "]\n"; + str_buf << "[objective_seed: " << objective_seed << "]\n"; str_buf << "[num_class: " << num_class << "]\n"; str_buf << "[is_unbalance: " << is_unbalance << "]\n"; str_buf << "[scale_pos_weight: " << scale_pos_weight << "]\n"; @@ -685,10 +686,9 @@ std::string Config::SaveMembersToString() const { str_buf << "[fair_c: " << fair_c << "]\n"; str_buf << "[poisson_max_delta_step: " << poisson_max_delta_step << "]\n"; str_buf << "[tweedie_variance_power: " << tweedie_variance_power << "]\n"; - str_buf << "[max_position: " << max_position << "]\n"; - str_buf << "[lambdamart_norm: " << lambdamart_norm << "]\n"; + str_buf << "[lambdarank_truncation_level: " << lambdarank_truncation_level << "]\n"; + str_buf << "[lambdarank_norm: " << lambdarank_norm << "]\n"; str_buf << "[label_gain: " << Common::Join(label_gain, ",") << "]\n"; - str_buf << "[objective_seed: " << objective_seed << "]\n"; str_buf << "[metric_freq: " << metric_freq << "]\n"; str_buf << "[is_provide_training_metric: " << is_provide_training_metric << "]\n"; str_buf << "[eval_at: " << Common::Join(eval_at, ",") << "]\n"; diff --git a/src/objective/objective_function.cpp b/src/objective/objective_function.cpp index 4c0a8dc8fbf6..193353d935c3 100644 --- a/src/objective/objective_function.cpp +++ b/src/objective/objective_function.cpp @@ -7,7 +7,6 @@ #include "binary_objective.hpp" #include "multiclass_objective.hpp" #include "rank_objective.hpp" -#include "rank_xendcg_objective.hpp" #include "regression_objective.hpp" #include "xentropy_objective.hpp" diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp index ab60ab0d4b94..f9dfd0a391d8 100644 --- a/src/objective/rank_objective.hpp +++ b/src/objective/rank_objective.hpp @@ -1,6 +1,7 @@ /*! - * Copyright (c) 2016 Microsoft Corporation. All rights reserved. - * Licensed under the MIT License. See LICENSE file in the project root for license information. + * Copyright (c) 2020 Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See LICENSE file in the project root for + * license information. */ #ifndef LIGHTGBM_OBJECTIVE_RANK_OBJECTIVE_HPP_ #define LIGHTGBM_OBJECTIVE_RANK_OBJECTIVE_HPP_ @@ -8,12 +9,12 @@ #include #include -#include -#include #include #include #include #include +#include +#include #include namespace LightGBM { @@ -23,10 +24,10 @@ namespace LightGBM { */ class RankingObjective : public ObjectiveFunction { public: - explicit RankingObjective(const Config&) { - } + explicit RankingObjective(const Config& config) + : seed_(config.objective_seed) {} - explicit RankingObjective(const std::vector&) {} + explicit RankingObjective(const std::vector&) : seed_(0) {} ~RankingObjective() {} @@ -34,7 +35,6 @@ class RankingObjective : public ObjectiveFunction { num_data_ = num_data; // get label label_ = metadata.label(); - DCGCalculator::CheckLabel(label_, num_data_); // get weights weights_ = metadata.weights(); // get boundries @@ -49,13 +49,25 @@ class RankingObjective : public ObjectiveFunction { score_t* hessians) const override { #pragma omp parallel for schedule(guided) for (data_size_t i = 0; i < num_queries_; ++i) { - GetGradientsForOneQuery(score, gradients, hessians, i); + const data_size_t start = query_boundaries_[i]; + const data_size_t cnt = query_boundaries_[i + 1] - query_boundaries_[i]; + GetGradientsForOneQuery(i, cnt, label_ + start, score + start, + gradients + start, hessians + start); + if (weights_ != nullptr) { + for (data_size_t j = 0; j < cnt; ++j) { + gradients[start + j] = + static_cast(gradients[start + j] * weights_[start + j]); + hessians[start + j] = + static_cast(hessians[start + j] * weights_[start + j]); + } + } } } - virtual void GetGradientsForOneQuery(const double* score, score_t* lambdas, - score_t* hessians, - data_size_t query_id) const = 0; + virtual void GetGradientsForOneQuery(data_size_t query_id, data_size_t cnt, + const label_t* label, + const double* score, score_t* lambdas, + score_t* hessians) const = 0; virtual const char* GetName() const override = 0; @@ -68,6 +80,7 @@ class RankingObjective : public ObjectiveFunction { bool NeedAccuratePrediction() const override { return false; } protected: + int seed_; data_size_t num_queries_; /*! \brief Number of data */ data_size_t num_data_; @@ -79,19 +92,19 @@ class RankingObjective : public ObjectiveFunction { const data_size_t* query_boundaries_; }; /*! -* \brief Objective function for Lambdrank with NDCG -*/ + * \brief Objective function for Lambdrank with NDCG + */ class LambdarankNDCG : public RankingObjective { public: - explicit LambdarankNDCG(const Config& config) : RankingObjective(config) { - sigmoid_ = static_cast(config.sigmoid); - norm_ = config.lambdamart_norm; + explicit LambdarankNDCG(const Config& config) + : RankingObjective(config), + sigmoid_(config.sigmoid), + norm_(config.lambdarank_norm), + truncation_level_(config.lambdarank_truncation_level) { label_gain_ = config.label_gain; // initialize DCG calculator DCGCalculator::DefaultLabelGain(&label_gain_); DCGCalculator::Init(label_gain_); - // will optimize NDCG@optimize_pos_at_ - optimize_pos_at_ = config.max_position; sigmoid_table_.clear(); inverse_max_dcgs_.clear(); if (sigmoid_ <= 0.0) { @@ -102,17 +115,17 @@ class LambdarankNDCG : public RankingObjective { explicit LambdarankNDCG(const std::vector& strs) : RankingObjective(strs) {} - ~LambdarankNDCG() { - } + ~LambdarankNDCG() {} void Init(const Metadata& metadata, data_size_t num_data) override { RankingObjective::Init(metadata, num_data); + DCGCalculator::CheckLabel(label_, num_data_); inverse_max_dcgs_.resize(num_queries_); #pragma omp parallel for schedule(static) for (data_size_t i = 0; i < num_queries_; ++i) { - inverse_max_dcgs_[i] = DCGCalculator::CalMaxDCGAtK(optimize_pos_at_, - label_ + query_boundaries_[i], - query_boundaries_[i + 1] - query_boundaries_[i]); + inverse_max_dcgs_[i] = DCGCalculator::CalMaxDCGAtK( + truncation_level_, label_ + query_boundaries_[i], + query_boundaries_[i + 1] - query_boundaries_[i]); if (inverse_max_dcgs_[i] > 0.0) { inverse_max_dcgs_[i] = 1.0f / inverse_max_dcgs_[i]; @@ -122,31 +135,25 @@ class LambdarankNDCG : public RankingObjective { ConstructSigmoidTable(); } - inline void GetGradientsForOneQuery(const double* score, - score_t* lambdas, score_t* hessians, data_size_t query_id) const { - // get doc boundary for current query - const data_size_t start = query_boundaries_[query_id]; - const data_size_t cnt = - query_boundaries_[query_id + 1] - query_boundaries_[query_id]; + inline void GetGradientsForOneQuery(data_size_t query_id, data_size_t cnt, + const label_t* label, const double* score, + score_t* lambdas, + score_t* hessians) const override { // get max DCG on current query const double inverse_max_dcg = inverse_max_dcgs_[query_id]; - // add pointers with offset - const label_t* label = label_ + start; - score += start; - lambdas += start; - hessians += start; // initialize with zero for (data_size_t i = 0; i < cnt; ++i) { lambdas[i] = 0.0f; hessians[i] = 0.0f; } // get sorted indices for scores - std::vector sorted_idx; + std::vector sorted_idx(cnt); for (data_size_t i = 0; i < cnt; ++i) { - sorted_idx.emplace_back(i); + sorted_idx[i] = i; } - std::stable_sort(sorted_idx.begin(), sorted_idx.end(), - [score](data_size_t a, data_size_t b) { return score[a] > score[b]; }); + std::stable_sort( + sorted_idx.begin(), sorted_idx.end(), + [score](data_size_t a, data_size_t b) { return score[a] > score[b]; }); // get best and worst score const double best_score = score[sorted_idx[0]]; data_size_t worst_idx = cnt - 1; @@ -160,20 +167,25 @@ class LambdarankNDCG : public RankingObjective { const data_size_t high = sorted_idx[i]; const int high_label = static_cast(label[high]); const double high_score = score[high]; - if (high_score == kMinScore) { continue; } + if (high_score == kMinScore) { + continue; + } const double high_label_gain = label_gain_[high_label]; const double high_discount = DCGCalculator::GetDiscount(i); double high_sum_lambda = 0.0; double high_sum_hessian = 0.0; for (data_size_t j = 0; j < cnt; ++j) { // skip same data - if (i == j) { continue; } - + if (i == j) { + continue; + } const data_size_t low = sorted_idx[j]; const int low_label = static_cast(label[low]); const double low_score = score[low]; // only consider pair with different label - if (high_label <= low_label || low_score == kMinScore) { continue; } + if (high_label <= low_label || low_score == kMinScore) { + continue; + } const double delta_score = high_score - low_score; @@ -186,7 +198,7 @@ class LambdarankNDCG : public RankingObjective { // get delta NDCG double delta_pair_NDCG = dcg_gap * paired_discount * inverse_max_dcg; // regular the delta_pair_NDCG by score distance - if (norm_ && high_label != low_label && best_score != worst_score) { + if (norm_ && best_score != worst_score) { delta_pair_NDCG /= (0.01f + fabs(delta_score)); } // calculate lambda for this pair @@ -213,25 +225,18 @@ class LambdarankNDCG : public RankingObjective { hessians[i] = static_cast(hessians[i] * norm_factor); } } - // if need weights - if (weights_ != nullptr) { - for (data_size_t i = 0; i < cnt; ++i) { - lambdas[i] = static_cast(lambdas[i] * weights_[start + i]); - hessians[i] = static_cast(hessians[i] * weights_[start + i]); - } - } } - inline double GetSigmoid(double score) const { if (score <= min_sigmoid_input_) { // too small, use lower bound return sigmoid_table_[0]; } else if (score >= max_sigmoid_input_) { - // too big, use upper bound + // too large, use upper bound return sigmoid_table_[_sigmoid_bins - 1]; } else { - return sigmoid_table_[static_cast((score - min_sigmoid_input_) * sigmoid_table_idx_factor_)]; + return sigmoid_table_[static_cast((score - min_sigmoid_input_) * + sigmoid_table_idx_factor_)]; } } @@ -242,7 +247,7 @@ class LambdarankNDCG : public RankingObjective { sigmoid_table_.resize(_sigmoid_bins); // get score to bin factor sigmoid_table_idx_factor_ = - _sigmoid_bins / (max_sigmoid_input_ - min_sigmoid_input_); + _sigmoid_bins / (max_sigmoid_input_ - min_sigmoid_input_); // cache for (size_t i = 0; i < _sigmoid_bins; ++i) { const double score = i / sigmoid_table_idx_factor_ + min_sigmoid_input_; @@ -250,23 +255,20 @@ class LambdarankNDCG : public RankingObjective { } } - const char* GetName() const override { - return "lambdarank"; - } + const char* GetName() const override { return "lambdarank"; } private: - /*! \brief Gains for labels */ - std::vector label_gain_; - /*! \brief Cache inverse max DCG, speed up calculation */ - std::vector inverse_max_dcgs_; /*! \brief Simgoid param */ double sigmoid_; /*! \brief Normalize the lambdas or not */ bool norm_; - /*! \brief Optimized NDCG@ */ - int optimize_pos_at_; + /*! \brief truncation position for max ndcg */ + int truncation_level_; + /*! \brief Cache inverse max DCG, speed up calculation */ + std::vector inverse_max_dcgs_; /*! \brief Cache result for sigmoid transform to speed up */ std::vector sigmoid_table_; + std::vector label_gain_; /*! \brief Number of bins in simoid table */ size_t _sigmoid_bins = 1024 * 1024; /*! \brief Minimal input of sigmoid table */ @@ -277,93 +279,82 @@ class LambdarankNDCG : public RankingObjective { double sigmoid_table_idx_factor_; }; - /*! * \brief Implementation of the learning-to-rank objective function, XE_NDCG * [arxiv.org/abs/1911.09798]. */ class RankXENDCG : public RankingObjective { public: - explicit RankXENDCG(const Config& config) : RankingObjective(config), rand_(config.objective_seed) { - } + explicit RankXENDCG(const Config& config) : RankingObjective(config) {} explicit RankXENDCG(const std::vector& strs) - : RankingObjective(strs), rand_() {} + : RankingObjective(strs) {} ~RankXENDCG() {} - inline void GetGradientsForOneQuery(const double* score, score_t* lambdas, - score_t* hessians, - data_size_t query_id) const { - // get doc boundary for current query - const data_size_t start = query_boundaries_[query_id]; - const data_size_t cnt = - query_boundaries_[query_id + 1] - query_boundaries_[query_id]; - // add pointers with offset - const label_t* label = label_ + start; - score += start; - lambdas += start; - hessians += start; + void Init(const Metadata& metadata, data_size_t num_data) override { + RankingObjective::Init(metadata, num_data); + for (data_size_t i = 0; i < num_queries_; ++i) { + rands_.emplace_back(seed_ + i); + } + } + inline void GetGradientsForOneQuery(data_size_t query_id, data_size_t cnt, + const label_t* label, const double* score, + score_t* lambdas, + score_t* hessians) const override { // Turn scores into a probability distribution using Softmax. - std::vector rho(cnt); + std::vector rho(cnt, 0.0); Common::Softmax(score, rho.data(), cnt); - // Prepare a vector of gammas, a parameter of the loss. - std::vector gammas(cnt); - for (data_size_t i = 0; i < cnt; ++i) { - gammas[i] = rand_.NextFloat(); - } - - // Skip query if sum of labels is 0. + // used for Phi and L1 + std::vector l1s(cnt); double sum_labels = 0; for (data_size_t i = 0; i < cnt; ++i) { - sum_labels += static_cast(phi(label[i], gammas[i])); - } - if (std::fabs(sum_labels) < kEpsilon) { - return; + l1s[i] = Phi(label[i], rands_[query_id].NextFloat()); + sum_labels += l1s[i]; } - + // sum_labels will always be positive number + sum_labels = std::max(kEpsilon, sum_labels); // Approximate gradients and inverse Hessian. // First order terms. - std::vector L1s(cnt); + double sum_l1 = 0.0f; for (data_size_t i = 0; i < cnt; ++i) { - L1s[i] = -phi(label[i], gammas[i]) / sum_labels + rho[i]; + l1s[i] = -l1s[i] / sum_labels + rho[i]; + sum_l1 += l1s[i]; } - // Second-order terms. - std::vector L2s(cnt); - for (data_size_t i = 0; i < cnt; ++i) { - for (data_size_t j = 0; j < cnt; ++j) { - if (i == j) continue; - L2s[i] += L1s[j] / (1 - rho[j]); + if (cnt <= 1) { + // when cnt <= 1, the l2 and l3 are zeros + for (data_size_t i = 0; i < cnt; ++i) { + lambdas[i] = static_cast(l1s[i]); + hessians[i] = static_cast(rho[i] * (1.0 - rho[i])); } - } - // Third-order terms. - std::vector L3s(cnt); - for (data_size_t i = 0; i < cnt; ++i) { - for (data_size_t j = 0; j < cnt; ++j) { - if (i == j) continue; - L3s[i] += rho[j] * L2s[j] / (1 - rho[j]); + } else { + // Second order terms. + std::vector l2s(cnt, 0.0); + double sum_l2 = 0.0; + for (data_size_t i = 0; i < cnt; ++i) { + l2s[i] = (sum_l1 - l1s[i]) / (1 - rho[i]); + sum_l2 += l2s[i]; + } + for (data_size_t i = 0; i < cnt; ++i) { + auto l3 = (sum_l2 - l2s[i]) / (1 - rho[i]); + lambdas[i] = static_cast(l1s[i] + rho[i] * l2s[i] + + rho[i] * rho[i] * l3); + hessians[i] = static_cast(rho[i] * (1.0 - rho[i])); } - } - - // Finally, prepare lambdas and hessians. - for (data_size_t i = 0; i < cnt; ++i) { - lambdas[i] = - static_cast(L1s[i] + rho[i] * L2s[i] + rho[i] * L3s[i]); - hessians[i] = static_cast(rho[i] * (1.0 - rho[i])); } } - double phi(const label_t l, double g) const { + double Phi(const label_t l, double g) const { return Common::Pow(2, static_cast(l)) - g; } const char* GetName() const override { return "rank_xendcg"; } private: - mutable Random rand_; + mutable std::vector rands_; }; } // namespace LightGBM -#endif // LightGBM_OBJECTIVE_RANK_OBJECTIVE_HPP_ +#endif // LightGBM_OBJECTIVE_RANK_OBJECTIVE_HPP_ From 1d2453c9a09456f01a7ee1b62b50facfcf677f70 Mon Sep 17 00:00:00 2001 From: guolinke Date: Sun, 23 Feb 2020 13:07:33 +0800 Subject: [PATCH 4/6] fix test --- tests/python_package_test/test_sklearn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py index 838c6b36651a..5a15e0cdf0a6 100644 --- a/tests/python_package_test/test_sklearn.py +++ b/tests/python_package_test/test_sklearn.py @@ -129,7 +129,7 @@ def test_xendcg(self): eval_metric='ndcg', callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))]) self.assertLessEqual(gbm.best_iteration_, 24) - self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.6559) + self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.6382) self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.6421) def test_regression_with_custom_objective(self): From 7f2e30ba3683097fc3b974766c008713bbd1afef Mon Sep 17 00:00:00 2001 From: Guolin Ke Date: Sun, 23 Feb 2020 17:23:18 +0800 Subject: [PATCH 5/6] Update tests/python_package_test/test_sklearn.py --- tests/python_package_test/test_sklearn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py index 5a15e0cdf0a6..12ea27d922c7 100644 --- a/tests/python_package_test/test_sklearn.py +++ b/tests/python_package_test/test_sklearn.py @@ -130,7 +130,7 @@ def test_xendcg(self): callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))]) self.assertLessEqual(gbm.best_iteration_, 24) self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.6382) - self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.6421) + self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.6319) def test_regression_with_custom_objective(self): X, y = load_boston(True) From 2376a9e4ff85616ee2e218973f713d60d4aaf2d8 Mon Sep 17 00:00:00 2001 From: guolinke Date: Sun, 23 Feb 2020 17:41:19 +0800 Subject: [PATCH 6/6] fix test --- R-package/tests/testthat/test_learning_to_rank.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R-package/tests/testthat/test_learning_to_rank.R b/R-package/tests/testthat/test_learning_to_rank.R index 05193de729e2..3cab22304e6b 100644 --- a/R-package/tests/testthat/test_learning_to_rank.R +++ b/R-package/tests/testthat/test_learning_to_rank.R @@ -20,7 +20,7 @@ test_that("learning-to-rank with lgb.train() works as expected", { objective = "lambdarank" , metric = "ndcg" , ndcg_at = ndcg_at - , max_position = 3L + , lambdarank_truncation_level = 3L , learning_rate = 0.001 ) model <- lgb.train( @@ -67,7 +67,7 @@ test_that("learning-to-rank with lgb.cv() works as expected", { objective = "lambdarank" , metric = "ndcg" , ndcg_at = ndcg_at - , max_position = 3L + , lambdarank_truncation_level = 3L , label_gain = "0,1,3" ) nfold <- 4L