From 436921a4f674aaccb5fbccdcccd868ea1231bcda Mon Sep 17 00:00:00 2001 From: guijiql <970955517@qq.com> Date: Mon, 5 Jul 2021 15:57:23 +0800 Subject: [PATCH 1/3] FEA: add Coverage and GiniIndex metric --- recbole/evaluator/metrics.py | 89 +++++++++++++++++++++++++++++- recbole/evaluator/register.py | 2 +- tests/metrics/test_topk_metrics.py | 27 +++++++-- 3 files changed, 111 insertions(+), 7 deletions(-) diff --git a/recbole/evaluator/metrics.py b/recbole/evaluator/metrics.py index d5e74914b..009b5f835 100644 --- a/recbole/evaluator/metrics.py +++ b/recbole/evaluator/metrics.py @@ -4,7 +4,7 @@ # @email : tsotfsk@outlook.com # UPDATE -# @Time : 2020/08/12, 2021/6/25, 2020/9/16, 2021/7/2 +# @Time : 2020/08/12, 2021/7/5, 2020/9/16, 2021/7/2 # @Author : Kaiyuan Li, Zhichao Feng, Xingyu Pan, Zihan Lin # @email : tsotfsk@outlook.com, fzcbupt@gmail.com, panxy@ruc.edu.cn, zhlin@ruc.edu.cn @@ -151,6 +151,7 @@ class NDCG(TopkMetric): \mathrm {NDCG_u@K}=\frac{DCG_u@K}{IDCG_u@K}\\ \mathrm {NDCG@K}=\frac{\sum \nolimits_{u \in U^{te}NDCG_u@K}}{|U^{te}|} \end{gather} + :math:`K` stands for recommending :math:`K` items. And the :math:`rel_i` is the relevance of the item in position :math:`i` in the recommendation list. :math:`{rel_i}` equals to 1 if the item is ground truth otherwise 0. @@ -387,6 +388,46 @@ def metric_info(self, preds, trues): return loss / len(preds) +class ItemCoverage(object): + r"""It computes the coverage of recommended items over all items. + + For further details, please refer to the `paper ` and + `paper `_ + + .. math:: + \mathrm{Coverage}=\frac{\left| \bigcup_{u \in U} \hat{R}(u) \right|}{|I|} + + :math:`U` is total user set. + + :math:`R_{u}` is the recommended list of items for user u. + + :math:`I` is total item set. + """ + + def __init__(self, config): + self.topk = config['topk'] + self.decimal_place = config['metric_decimal_place'] + + def used_info(self, dataobject): + """get the matrix of recommendation items and number of items in total item set""" + item_matrix = dataobject.get('rec.items') + num_items = dataobject.get('data.num_items') + return item_matrix.numpy(), num_items + + def calculate_metric(self, dataobject): + item_matrix, num_items = self.used_info(dataobject) + metric_dict = {} + for k in self.topk: + key = '{}@{}'.format('itemcoverage', k) + metric_dict[key] = round(self.get_coverage(item_matrix[:, :k], num_items), self.decimal_place) + return metric_dict + + def get_coverage(self, item_matrix, num_items): + """get the coverage of recommended items over all items""" + unique_count = np.unique(item_matrix).shape[0] + return unique_count / num_items + + class AveragePopularity: r"""It computes the average popularity of recommended items. @@ -485,6 +526,50 @@ def get_entropy(self, item_matrix): return result/len(item_count) +class GiniIndex(object): + r"""This metric present the diversity of the recommendation items. + It is used to measure the inequality of a distribution. + + For further details, please refer to the `paper ` + + .. math:: + \mathrm {GiniIndex}=\left(\frac{\sum_{i=1}^{n}(2 i-n-1) P_{(i)}}{n \sum_{i=1}^{n} P_{(i)}}\right) + + :math:`n` is the number of all items. + :math:`P_{(i)}` is the number of each item in recommended list, + which is indexed in non-decreasing order (P_{(i)} \leq P_{(i+1)}). + """ + + def __init__(self, config): + self.topk = config['topk'] + self.decimal_place = config['metric_decimal_place'] + + def used_info(self, dataobject): + """get the matrix of recommendation items and number of items in total item set""" + item_matrix = dataobject.get('rec.items') + num_items = dataobject.get('data.num_items') + return item_matrix.numpy(), num_items + + def calculate_metric(self, dataobject): + item_matrix, num_items = self.used_info(dataobject) + metric_dict = {} + for k in self.topk: + key = '{}@{}'.format('giniindex', k) + metric_dict[key] = round(self.get_gini(item_matrix[:, :k], num_items), self.decimal_place) + return metric_dict + + def get_gini(self, item_matrix, num_items): + """get gini index""" + item_count = dict(Counter(item_matrix.flatten())) + sorted_count = np.array(sorted(item_count.values())) + num_recommended_items = sorted_count.shape[0] + total_num = item_matrix.shape[0] * item_matrix.shape[1] + idx = np.arange(num_items - num_recommended_items + 1, num_items + 1) + gini_index = np.sum((2 * idx - num_items - 1) * sorted_count) / total_num + gini_index /= num_items + return gini_index + + metrics_dict = { 'ndcg': NDCG, 'hit': Hit, @@ -497,6 +582,8 @@ def get_entropy(self, item_matrix): 'logloss': LogLoss, 'auc': AUC, 'gauc': GAUC, + 'itemcoverage': ItemCoverage, 'averagepopularity': AveragePopularity, + 'giniindex': GiniIndex, 'shannonentropy': ShannonEntropy } diff --git a/recbole/evaluator/register.py b/recbole/evaluator/register.py index ff535aef3..0c4f636f8 100644 --- a/recbole/evaluator/register.py +++ b/recbole/evaluator/register.py @@ -31,7 +31,7 @@ 'logloss': ['rec.score', 'data.label']} # These metrics are typical in top-k recommendations topk_metrics = {metric.lower(): metric for metric in ['Hit', 'Recall', 'MRR', 'Precision', 'NDCG', 'MAP', - 'AveragePopularity', 'ShannonEntropy']} + 'ItemCoverage', 'AveragePopularity', 'ShannonEntropy', 'GiniIndex']} # These metrics are typical in loss recommendations loss_metrics = {metric.lower(): metric for metric in ['AUC', 'RMSE', 'MAE', 'LOGLOSS']} # For GAUC diff --git a/tests/metrics/test_topk_metrics.py b/tests/metrics/test_topk_metrics.py index ed1718ce4..87557354e 100644 --- a/tests/metrics/test_topk_metrics.py +++ b/tests/metrics/test_topk_metrics.py @@ -4,9 +4,9 @@ # @email : tsotfsk@outlook.com # UPDATE -# @Time : 2021/7/2 -# @Author : Zihan Lin -# @email : zhlin@ruc.edu.cn +# @Time : 2021/7/2, 2021/7/5 +# @Author : Zihan Lin, Zhichao Feng +# @email : zhlin@ruc.edu.cn, fzcbupt@gmail.com import os import sys @@ -39,6 +39,8 @@ [5, 3, 7] ]) +num_items = 8 + item_count = {1: 0, 2: 1, 3: 2, @@ -105,6 +107,13 @@ def test_precision(self): np.array([[0, 0, 0], [1 / 1, 2 / 2, 3 / 3], [1 / 1, 1 / 2, 2 / 3], [0, 0, 1 / 3]]).tolist()) + def test_itemcoverage(self): + name = 'itemcoverage' + Metric = metrics_dict[name](config) + self.assertEqual( + Metric.get_coverage(item_matrix, num_items), + 7 / 8) + def test_averagepopularity(self): name = 'averagepopularity' Metric = metrics_dict[name](config) @@ -113,13 +122,21 @@ def test_averagepopularity(self): np.array([[4/1, 4/2, 6/3], [3/1, 7/2, 8/3], [1/1, 3/2, 7/3], [0/1, 3/2, 8/3], [4/1, 6/2, 6/3]]).tolist()) - def test_ShannonEntropy(self): + def test_giniindex(self): + name = 'giniindex' + Metric = metrics_dict[name](config) + self.assertEqual( + Metric.get_gini(item_matrix, num_items), + ((-7) * 0 + (-5) * 1 + (-3) * 1 + (-1) * 2 + 1 * 2 + 3 * 2 + 5 * 3 + 7 * 4) + / (8 * (3 * 5))) + + def test_shannonentropy(self): name = 'shannonentropy' Metric = metrics_dict[name](config) self.assertEqual( Metric.get_entropy(item_matrix), -np.mean([1/15*np.log(1/15), 2/15*np.log(2/15), 3/15*np.log(3/15), 2/15*np.log(2/15), - 4/15*np.log(4/15), 1/15*np.log(1/15), 2/15*np.log(2/15)])) + 4/15*np.log(4/15), 1/15*np.log(1/15), 2/15*np.log(2/15)])) if __name__ == "__main__": From c68e5ce196fafb15c0db1bc09ed8d56999a0ba4f Mon Sep 17 00:00:00 2001 From: guijiql <970955517@qq.com> Date: Mon, 5 Jul 2021 15:58:42 +0800 Subject: [PATCH 2/3] FEA: change config for new metrics --- recbole/config/configurator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recbole/config/configurator.py b/recbole/config/configurator.py index 854b42dc1..c71435c2a 100644 --- a/recbole/config/configurator.py +++ b/recbole/config/configurator.py @@ -298,7 +298,7 @@ def _set_default_parameters(self): eval_type = EvaluatorType.RANKING self.final_config_dict['eval_type'] = eval_type - smaller_metric = ['rmse', 'mae', 'logloss'] + smaller_metric = ['rmse', 'mae', 'logloss', 'averagepopularity', 'giniindex'] valid_metric = self.final_config_dict['valid_metric'].split('@')[0] self.final_config_dict['valid_metric_bigger'] = False if valid_metric.lower() in smaller_metric else True From b2663fe8599476476cd5a3e4306c86a37d924b0d Mon Sep 17 00:00:00 2001 From: guijiql <970955517@qq.com> Date: Mon, 5 Jul 2021 16:11:43 +0800 Subject: [PATCH 3/3] FIX: update date --- recbole/evaluator/register.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/recbole/evaluator/register.py b/recbole/evaluator/register.py index 0c4f636f8..f3c2ed839 100644 --- a/recbole/evaluator/register.py +++ b/recbole/evaluator/register.py @@ -2,6 +2,11 @@ # @Author : Zihan Lin # @Email : zhlin@ruc.edu.cn +# UPDATE +# @Time : 2021/7/5 +# @Author : Zhichao Feng +# @email : fzcbupt@gmail.com + """ recbole.evaluator.register ################################################