From 0772227c2e5f5899f394073c6a3c27b804b555d8 Mon Sep 17 00:00:00 2001 From: chenyushuo <297086016@qq.com> Date: Fri, 18 Dec 2020 10:54:56 +0800 Subject: [PATCH 01/22] FEA: add config['benchmark_filename'] to load pre-split dataset; increased the robustness of data.utils.data_preparation, which can divide the dataset into two parts (train, test) or three parts (train, valid, test). --- recbole/data/dataset/dataset.py | 5 +++++ recbole/data/utils.py | 24 +++++++++++++++++------- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/recbole/data/dataset/dataset.py b/recbole/data/dataset/dataset.py index d0bcbce25..3aef268be 100644 --- a/recbole/data/dataset/dataset.py +++ b/recbole/data/dataset/dataset.py @@ -1340,6 +1340,11 @@ def build(self, eval_setting): Returns: list: List of builded :class:`Dataset`. """ + if self.benchmark_filename_list is not None: + cumsum = list(np.cumsum(self.file_size_list)) + datasets = [self.copy(self.inter_feat[start: end]) for start, end in zip([0] + cumsum[:-1], cumsum)] + return datasets + ordering_args = eval_setting.ordering_args if ordering_args['strategy'] == 'shuffle': self.shuffle() diff --git a/recbole/data/utils.py b/recbole/data/utils.py index f6e649daf..f9c4ed06a 100644 --- a/recbole/data/utils.py +++ b/recbole/data/utils.py @@ -86,8 +86,15 @@ def data_preparation(config, dataset, save=False): raise ValueError('Sequential models require "loo" split strategy.') builded_datasets = dataset.build(es) - train_dataset, valid_dataset, test_dataset = builded_datasets - phases = ['train', 'valid', 'test'] + if len(builded_datasets) not in {2, 3}: + raise ValueError('Dataset should only be divided into two or three parts.') + else: + train_dataset = builded_datasets[0] + evaluation_datasets = builded_datasets[1:] + if len(builded_datasets) == 2: + phases = ['train', 'test'] + else: + phases = ['train', 'valid', 'test'] if save: save_datasets(config['checkpoint_dir'], name=phases, dataset=builded_datasets) @@ -100,7 +107,7 @@ def data_preparation(config, dataset, save=False): sampler = Sampler(phases, builded_datasets, es.neg_sample_args['distribution']) else: sampler = RepeatableSampler(phases, dataset, es.neg_sample_args['distribution']) - kwargs['sampler'] = sampler.set_phase('train') + kwargs['sampler'] = sampler.set_phase(phases[0]) kwargs['neg_sample_args'] = copy.deepcopy(es.neg_sample_args) if model_type == ModelType.KNOWLEDGE: kg_sampler = KGSampler(dataset, es.neg_sample_args['distribution']) @@ -122,18 +129,21 @@ def data_preparation(config, dataset, save=False): if 'sampler' not in locals(): sampler = Sampler(phases, builded_datasets, es.neg_sample_args['distribution']) sampler.set_distribution(es.neg_sample_args['distribution']) - kwargs['sampler'] = [sampler.set_phase('valid'), sampler.set_phase('test')] + kwargs['sampler'] = [sampler.set_phase(phase) for phase in phases[1:]] kwargs['neg_sample_args'] = copy.deepcopy(es.neg_sample_args) - valid_data, test_data = dataloader_construct( + evaluation_data = dataloader_construct( name='evaluation', config=config, eval_setting=es, - dataset=[valid_dataset, test_dataset], + dataset=evaluation_datasets, batch_size=config['eval_batch_size'], **kwargs ) - return train_data, valid_data, test_data + if len(builded_datasets) == 2: + return train_data, None, evaluation_data + else: + return [train_data] + evaluation_data def dataloader_construct(name, config, eval_setting, dataset, From 3769b2d48a6ac6ca1000fa88ed0dfd617c588c8d Mon Sep 17 00:00:00 2001 From: chenyushuo <297086016@qq.com> Date: Fri, 18 Dec 2020 10:56:25 +0800 Subject: [PATCH 02/22] FIX: Increased the robustness of GeneralFullDataLoader, which can handle empty dataset now. --- recbole/data/dataloader/general_dataloader.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/recbole/data/dataloader/general_dataloader.py b/recbole/data/dataloader/general_dataloader.py index b80feca5f..922f691cc 100644 --- a/recbole/data/dataloader/general_dataloader.py +++ b/recbole/data/dataloader/general_dataloader.py @@ -220,12 +220,11 @@ def __init__(self, config, dataset, sampler, neg_sample_args, dataset.sort(by=uid_field, ascending=True) last_uid = None - positive_item = None + positive_item = set() uid2used_item = sampler.used_ids for uid, iid in zip(dataset.inter_feat[uid_field].numpy(), dataset.inter_feat[iid_field].numpy()): if uid != last_uid: - if last_uid is not None: - self._set_user_property(last_uid, uid2used_item[last_uid], positive_item) + self._set_user_property(last_uid, uid2used_item[last_uid], positive_item) last_uid = uid self.uid_list.append(uid) positive_item = set() @@ -238,6 +237,8 @@ def __init__(self, config, dataset, sampler, neg_sample_args, batch_size=batch_size, dl_format=dl_format, shuffle=shuffle) def _set_user_property(self, uid, used_item, positive_item): + if uid is None: + return history_item = used_item - positive_item positive_item_num = len(positive_item) self.uid2items_num[uid] = positive_item_num From dd157a500251dcffaeb2e4647bc52b5db90c41e9 Mon Sep 17 00:00:00 2001 From: guijiql <970955517@qq.com> Date: Fri, 18 Dec 2020 12:42:08 +0800 Subject: [PATCH 03/22] FIX: can't raise error in IndividualEvaluator --- recbole/evaluator/abstract_evaluator.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/recbole/evaluator/abstract_evaluator.py b/recbole/evaluator/abstract_evaluator.py index 2570fe8ad..1ab81d476 100644 --- a/recbole/evaluator/abstract_evaluator.py +++ b/recbole/evaluator/abstract_evaluator.py @@ -4,7 +4,7 @@ # @email : tsotfsk@outlook.com # UPDATE -# @Time : 2020/10/21, 2020/12/9 +# @Time : 2020/10/21, 2020/12/18 # @Author : Kaiyuan Li, Zhichao Feng # @email : tsotfsk@outlook.com, fzcbupt@gmail.com @@ -99,7 +99,7 @@ class IndividualEvaluator(BaseEvaluator): """ def __init__(self, config, metrics): super().__init__(config, metrics) - pass + self._check_args() def sample_collect(self, true_scores, pred_scores): """It is called when evaluation sample distribution is `uniform` or `popularity`. @@ -127,3 +127,7 @@ def get_score_matrix(self, true_scores, pred_scores): scores_matrix = self.sample_collect(true_scores, pred_scores) return scores_matrix + + def _check_args(self): + if self.full: + raise NotImplementedError('full sort can\'t use IndividualEvaluator') \ No newline at end of file From 9ced71846479f4911ca2afe8c002a02c768b1ca7 Mon Sep 17 00:00:00 2001 From: guijiql <970955517@qq.com> Date: Fri, 18 Dec 2020 12:42:55 +0800 Subject: [PATCH 04/22] FIX: metrics disorder --- recbole/evaluator/proxy_evaluator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recbole/evaluator/proxy_evaluator.py b/recbole/evaluator/proxy_evaluator.py index fc91f5d48..9f181c4aa 100644 --- a/recbole/evaluator/proxy_evaluator.py +++ b/recbole/evaluator/proxy_evaluator.py @@ -32,9 +32,9 @@ def build(self): """ evaluator_list = [] - metrics_set = {metric.lower() for metric in self.metrics} + metrics_list = [metric.lower() for metric in self.metrics] for metrics, evaluator in metric_eval_bind: - used_metrics = list(metrics_set.intersection(set(metrics.keys()))) + used_metrics = [metric for metric in metrics_list if metric in metrics.keys()] if used_metrics: evaluator_list.append(evaluator(self.config, used_metrics)) return evaluator_list From c7cbd346a49a80b2f750d35aa3c3eb7cf730afd5 Mon Sep 17 00:00:00 2001 From: guijiql <970955517@qq.com> Date: Fri, 18 Dec 2020 12:46:16 +0800 Subject: [PATCH 05/22] FIX: GAUC calculation error --- recbole/evaluator/evaluators.py | 53 ++++++++++++++++++++++----------- recbole/evaluator/metrics.py | 35 +++++++++++++++++----- 2 files changed, 63 insertions(+), 25 deletions(-) diff --git a/recbole/evaluator/evaluators.py b/recbole/evaluator/evaluators.py index 10a41f715..a97fffa7e 100644 --- a/recbole/evaluator/evaluators.py +++ b/recbole/evaluator/evaluators.py @@ -11,7 +11,7 @@ import torch import numpy as np -from collections import ChainMap +from collections import ChainMap, Counter from recbole.evaluator.metrics import metrics_dict from recbole.evaluator.abstract_evaluator import GroupedEvalautor, IndividualEvaluator @@ -158,22 +158,35 @@ def get_user_pos_len_list(self, interaction, scores_tensor): user_len_list = interaction.user_len_list return pos_len_list, user_len_list - def get_pos_index(self, scores_tensor, pos_len_list, user_len_list): - """get the index of positive items + def rankdata(self, scores): + """Get the ranking of an ordered tensor, and take the average of the ranking for positions with equal values. - Args: - scores_tensor (tensor): the tensor of model output with size of `(N, )` - pos_len_list(list): number of positive items - user_len_list(list): number of all items + Args: + scores(tensor): an ordered tensor, with size of `(N, )` - Returns: - tensor: a matrix indicating whether the corresponding item is positive + Examples:: - """ - scores_matrix = self.get_score_matrix(scores_tensor, user_len_list) - _, n_index = torch.sort(scores_matrix, dim=-1, descending=True) - pos_index = (n_index < pos_len_list.reshape(-1, 1)) - return pos_index + >>> rankdata(tensor([[1,2,2,2,3,3,6],[2,2,2,2,4,4,5]])) + tensor([[1.0000, 3.0000, 3.0000, 3.0000, 5.5000, 5.5000, 7.0000], + [2.5000, 2.5000, 2.5000, 2.5000, 5.0000, 6.5000, 6.5000]]) + + """ + length, width = scores.shape + device = scores.device + true_tensor = torch.full((length, 1), True, dtype=np.bool, device=device) + + obs = torch.cat([true_tensor, scores[:, 1:] != scores[:, :-1]], dim=1) + # bias added to dense + bias = torch.arange(0, length, device=device).repeat(width).reshape(width, -1). \ + transpose(1, 0).reshape(-1) + dense = obs.view(-1).cumsum(0) + bias + + # cumulative counts of each unique value + count = torch.where(torch.cat([obs, true_tensor], dim=1))[1] + # get averange rank + avg_rank = .5 * (count[dense] + count[dense - 1] + 1).view(length, -1) + + return avg_rank def collect(self, interaction, scores_tensor): """collect the rank intermediate result of one batch, this function mainly implements ranking @@ -185,10 +198,16 @@ def collect(self, interaction, scores_tensor): """ pos_len_list, user_len_list = self.get_user_pos_len_list(interaction, scores_tensor) - pos_index = self.get_pos_index(scores_tensor, pos_len_list, user_len_list) - index_list = torch.arange(1, pos_index.shape[1] + 1).to(pos_index.device) - pos_rank_sum = torch.where(pos_index, index_list, torch.zeros_like(index_list)). \ + scores_matrix = self.get_score_matrix(scores_tensor, user_len_list) + desc_scores, desc_index = torch.sort(scores_matrix, dim=-1, descending=True) + + # get the index of positive items in the ranking list + pos_index = (desc_index < pos_len_list.reshape(-1, 1)) + + avg_rank = self.rankdata(desc_scores) + pos_rank_sum = torch.where(pos_index, avg_rank, torch.zeros_like(avg_rank)). \ sum(axis=-1).reshape(-1, 1) + return pos_rank_sum def evaluate(self, batch_matrix_list, eval_data): diff --git a/recbole/evaluator/metrics.py b/recbole/evaluator/metrics.py index cb31e1456..df6854257 100644 --- a/recbole/evaluator/metrics.py +++ b/recbole/evaluator/metrics.py @@ -4,7 +4,7 @@ # @email : tsotfsk@outlook.com # UPDATE -# @Time : 2020/08/12, 2020/12/9, 2020/9/16 +# @Time : 2020/08/12, 2020/12/18, 2020/9/16 # @Author : Kaiyuan Li, Zhichao Feng, Xingyu Pan # @email : tsotfsk@outlook.com, fzcbupt@gmail.com, panxy@ruc.edu.cn @@ -23,7 +23,6 @@ # TopK Metrics # - def hit_(pos_index, pos_len): r"""Hit_ (also known as hit ratio at :math:`N`) is a way of calculating how many 'hits' you have in an n-sized list of ranked items. @@ -129,7 +128,6 @@ def ndcg_(pos_index, pos_len): :math:`U^{te}` is for all users in the test set. """ - len_rank = np.full_like(pos_len, pos_index.shape[1]) idcg_len = np.where(pos_len > len_rank, len_rank, pos_len) @@ -166,10 +164,31 @@ def precision_(pos_index, pos_len): def gauc_(user_len_list, pos_len_list, pos_rank_sum): - frac = user_len_list - (pos_len_list - 1) / 2 - (1 / pos_len_list) * np.squeeze(pos_rank_sum) - neg_item_num = user_len_list - pos_len_list - user_auc = frac / neg_item_num + r"""GAUC_ (also known as Group Area Under Curve) is used to evaluate the two-class model, referring to + the area under the ROC curve grouped by user. + + .. _GAUC: https://dl.acm.org/doi/10.1145/3219819.3219823 + + Note: + It calculates the AUC score of each user, and finally obtains GAUC by weighting the user AUC + . It is also not limited to k. + + .. math:: + \mathrm {GAUC} = \frac {{{M} \times {(M+N+1)} - \frac{M \times (M+1)}{2}} - + \sum\limits_{i=1}^M rank_{i}} {{M} \times {N}} + + :math:`M` is the number of positive samples. + :math:`N` is the number of negative samples. + :math:`rank_i` is the descending rank of the ith positive sample. + Note: Due to our padding for `scores_tensor` in `RankEvaluator` with `-np.inf`, the padding value will influence + the ranks of origin items. Therefore, we use descending sort here and make an identity transformation to the + formula of `AUC`, which is shown in `auc_` function. For readability, we didn't do simplification in the code. + + """ + pair_num = (user_len_list + 1) * pos_len_list - pos_len_list * (pos_len_list + 1) / 2 - np.squeeze(pos_rank_sum) + neg_item_num = user_len_list - pos_len_list + user_auc = pair_num / (neg_item_num * pos_len_list) result = (user_auc * pos_len_list).sum() / pos_len_list.sum() return result @@ -188,11 +207,11 @@ def auc_(trues, preds): .. math:: \mathrm {AUC} = \frac{\sum\limits_{i=1}^M rank_{i} - - {{M} \times {(M+1)}}} {{M} \times {N}} + - \frac {{M} \times {(M+1)}}{2}} {{{M} \times {N}}} :math:`M` is the number of positive samples. :math:`N` is the number of negative samples. - :math:`rank_i` is the rank of the ith positive sample. + :math:`rank_i` is the ascending rank of the ith positive sample. """ fps, tps = _binary_clf_curve(trues, preds) From 5c1c1473afe5a0751fd5e355d7d71ee79824e2b4 Mon Sep 17 00:00:00 2001 From: guijiql <970955517@qq.com> Date: Fri, 18 Dec 2020 13:45:48 +0800 Subject: [PATCH 06/22] FIX: rename & comment format --- recbole/evaluator/evaluators.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/recbole/evaluator/evaluators.py b/recbole/evaluator/evaluators.py index a97fffa7e..a0713188e 100644 --- a/recbole/evaluator/evaluators.py +++ b/recbole/evaluator/evaluators.py @@ -4,14 +4,14 @@ # @email : tsotfsk@outlook.com # UPDATE -# @Time : 2020/08/04, 2020/08/11, 2020/12/9 +# @Time : 2020/08/04, 2020/08/11, 2020/12/18 # @Author : Kaiyuan Li, Yupeng Hou, Zhichao Feng # @email : tsotfsk@outlook.com, houyupeng@ruc.edu.cn, fzcbupt@gmail.com import torch import numpy as np -from collections import ChainMap, Counter +from collections import ChainMap from recbole.evaluator.metrics import metrics_dict from recbole.evaluator.abstract_evaluator import GroupedEvalautor, IndividualEvaluator @@ -158,22 +158,27 @@ def get_user_pos_len_list(self, interaction, scores_tensor): user_len_list = interaction.user_len_list return pos_len_list, user_len_list - def rankdata(self, scores): + def average_rank(self, scores): """Get the ranking of an ordered tensor, and take the average of the ranking for positions with equal values. Args: scores(tensor): an ordered tensor, with size of `(N, )` - Examples:: + Returns: + torch.Tensor: average_rank + + Example: + >>> average_rank(tensor([[1,2,2,2,3,3,6],[2,2,2,2,4,4,5]])) + tensor([[1.0000, 3.0000, 3.0000, 3.0000, 5.5000, 5.5000, 7.0000], + [2.5000, 2.5000, 2.5000, 2.5000, 5.0000, 6.5000, 6.5000]]) - >>> rankdata(tensor([[1,2,2,2,3,3,6],[2,2,2,2,4,4,5]])) - tensor([[1.0000, 3.0000, 3.0000, 3.0000, 5.5000, 5.5000, 7.0000], - [2.5000, 2.5000, 2.5000, 2.5000, 5.0000, 6.5000, 6.5000]]) + Reference: + https://github.com/scipy/scipy/blob/v0.17.1/scipy/stats/stats.py#L5262-L5352 """ length, width = scores.shape device = scores.device - true_tensor = torch.full((length, 1), True, dtype=np.bool, device=device) + true_tensor = torch.full((length, 1), True, dtype=torch.bool, device=device) obs = torch.cat([true_tensor, scores[:, 1:] != scores[:, :-1]], dim=1) # bias added to dense @@ -183,7 +188,7 @@ def rankdata(self, scores): # cumulative counts of each unique value count = torch.where(torch.cat([obs, true_tensor], dim=1))[1] - # get averange rank + # get average rank avg_rank = .5 * (count[dense] + count[dense - 1] + 1).view(length, -1) return avg_rank @@ -204,7 +209,7 @@ def collect(self, interaction, scores_tensor): # get the index of positive items in the ranking list pos_index = (desc_index < pos_len_list.reshape(-1, 1)) - avg_rank = self.rankdata(desc_scores) + avg_rank = self.average_rank(desc_scores) pos_rank_sum = torch.where(pos_index, avg_rank, torch.zeros_like(avg_rank)). \ sum(axis=-1).reshape(-1, 1) From 2dcac2869a3fbf21bef41fe8b565fa725b85c32b Mon Sep 17 00:00:00 2001 From: chenyushuo <297086016@qq.com> Date: Fri, 18 Dec 2020 21:07:46 +0800 Subject: [PATCH 07/22] REVERT: revert modify in data.utils --- recbole/data/utils.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/recbole/data/utils.py b/recbole/data/utils.py index f9c4ed06a..2eda471e9 100644 --- a/recbole/data/utils.py +++ b/recbole/data/utils.py @@ -86,15 +86,8 @@ def data_preparation(config, dataset, save=False): raise ValueError('Sequential models require "loo" split strategy.') builded_datasets = dataset.build(es) - if len(builded_datasets) not in {2, 3}: - raise ValueError('Dataset should only be divided into two or three parts.') - else: - train_dataset = builded_datasets[0] - evaluation_datasets = builded_datasets[1:] - if len(builded_datasets) == 2: - phases = ['train', 'test'] - else: - phases = ['train', 'valid', 'test'] + train_dataset, valid_dataset, test_dataset = builded_datasets + phases = ['train', 'valid', 'test'] if save: save_datasets(config['checkpoint_dir'], name=phases, dataset=builded_datasets) @@ -107,7 +100,7 @@ def data_preparation(config, dataset, save=False): sampler = Sampler(phases, builded_datasets, es.neg_sample_args['distribution']) else: sampler = RepeatableSampler(phases, dataset, es.neg_sample_args['distribution']) - kwargs['sampler'] = sampler.set_phase(phases[0]) + kwargs['sampler'] = sampler.set_phase('train') kwargs['neg_sample_args'] = copy.deepcopy(es.neg_sample_args) if model_type == ModelType.KNOWLEDGE: kg_sampler = KGSampler(dataset, es.neg_sample_args['distribution']) @@ -128,22 +121,19 @@ def data_preparation(config, dataset, save=False): getattr(es, es_str[1])() if 'sampler' not in locals(): sampler = Sampler(phases, builded_datasets, es.neg_sample_args['distribution']) - sampler.set_distribution(es.neg_sample_args['distribution']) - kwargs['sampler'] = [sampler.set_phase(phase) for phase in phases[1:]] + sampler.set_distribution(es.neg_sample_args['distribution']) + kwargs['sampler'] = [sampler.set_phase('valid'), sampler.set_phase('test')] kwargs['neg_sample_args'] = copy.deepcopy(es.neg_sample_args) - evaluation_data = dataloader_construct( + valid_data, test_data = dataloader_construct( name='evaluation', config=config, eval_setting=es, - dataset=evaluation_datasets, + dataset=[valid_dataset, test_dataset], batch_size=config['eval_batch_size'], **kwargs ) - if len(builded_datasets) == 2: - return train_data, None, evaluation_data - else: - return [train_data] + evaluation_data + return train_data, valid_data, test_data def dataloader_construct(name, config, eval_setting, dataset, From fd86870a86a18b11efc02072263516a22e90d07e Mon Sep 17 00:00:00 2001 From: fzc <970955517@qq.com> Date: Sat, 19 Dec 2020 12:27:12 +0800 Subject: [PATCH 08/22] update notes --- recbole/evaluator/metrics.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/recbole/evaluator/metrics.py b/recbole/evaluator/metrics.py index df6854257..b2bcaded1 100644 --- a/recbole/evaluator/metrics.py +++ b/recbole/evaluator/metrics.py @@ -170,8 +170,11 @@ def gauc_(user_len_list, pos_len_list, pos_rank_sum): .. _GAUC: https://dl.acm.org/doi/10.1145/3219819.3219823 Note: - It calculates the AUC score of each user, and finally obtains GAUC by weighting the user AUC - . It is also not limited to k. + It calculates the AUC score of each user, and finally obtains GAUC by weighting the user AUC. + It is also not limited to k.Due to our padding for `scores_tensor` in `RankEvaluator` with + `-np.inf`, the padding value will influence the ranks of origin items. Therefore, we use + descending sort here and make an identity transformation to the formula of `AUC`, which is + shown in `auc_` function. For readability, we didn't do simplification in the code. .. math:: \mathrm {GAUC} = \frac {{{M} \times {(M+N+1)} - \frac{M \times (M+1)}{2}} - @@ -181,10 +184,6 @@ def gauc_(user_len_list, pos_len_list, pos_rank_sum): :math:`N` is the number of negative samples. :math:`rank_i` is the descending rank of the ith positive sample. - Note: Due to our padding for `scores_tensor` in `RankEvaluator` with `-np.inf`, the padding value will influence - the ranks of origin items. Therefore, we use descending sort here and make an identity transformation to the - formula of `AUC`, which is shown in `auc_` function. For readability, we didn't do simplification in the code. - """ pair_num = (user_len_list + 1) * pos_len_list - pos_len_list * (pos_len_list + 1) / 2 - np.squeeze(pos_rank_sum) neg_item_num = user_len_list - pos_len_list From e84aeb79af3ab5be3147768cb4fefad22e27cbcb Mon Sep 17 00:00:00 2001 From: chenyushuo <297086016@qq.com> Date: Sun, 20 Dec 2020 21:36:00 +0800 Subject: [PATCH 09/22] FEA: Increased the robustness of trainer.evaluate --- recbole/trainer/trainer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/recbole/trainer/trainer.py b/recbole/trainer/trainer.py index 1a3ef791a..b033faa18 100644 --- a/recbole/trainer/trainer.py +++ b/recbole/trainer/trainer.py @@ -324,6 +324,9 @@ def evaluate(self, eval_data, load_best_model=True, model_file=None): Returns: dict: eval result, key is the eval metric and value in the corresponding metric value """ + if not eval_data: + return + if load_best_model: if model_file: checkpoint_file = model_file From 50bf9e808203b6b4c99c2ab87a956958667cd7f3 Mon Sep 17 00:00:00 2001 From: chenyushuo <297086016@qq.com> Date: Sun, 20 Dec 2020 21:53:27 +0800 Subject: [PATCH 10/22] FIX: bug fix in GeneralFullDataLoader. --- recbole/data/dataloader/general_dataloader.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/recbole/data/dataloader/general_dataloader.py b/recbole/data/dataloader/general_dataloader.py index 922f691cc..cf75341c3 100644 --- a/recbole/data/dataloader/general_dataloader.py +++ b/recbole/data/dataloader/general_dataloader.py @@ -261,17 +261,16 @@ def _shuffle(self): self.logger.warnning('GeneralFullDataLoader can\'t shuffle') def _next_batch_data(self): - index = slice(self.pr, self.pr + self.step) - user_df = self.user_df[index] - pos_len_list = self.uid2items_num[self.uid_list[index]] - user_len_list = np.full(len(user_df), self.item_num) - user_df.set_additional_info(pos_len_list, user_len_list) + user_df = self.user_df[self.pr: self.pr + self.step] cur_data = self._neg_sampling(user_df) self.pr += self.step return cur_data def _neg_sampling(self, user_df): uid_list = list(user_df[self.dataset.uid_field]) + pos_len_list = self.uid2items_num[uid_list] + user_len_list = np.full(len(uid_list), self.item_num) + user_df.set_additional_info(pos_len_list, user_len_list) history_item = self.uid2history_item[uid_list] history_row = torch.cat([torch.full_like(hist_iid, i) for i, hist_iid in enumerate(history_item)]) From 4b4b9a81b3866c9adae1699da80d39cc37dbb4bd Mon Sep 17 00:00:00 2001 From: shanlei <2015201909@ruc.edu.cn> Date: Mon, 21 Dec 2020 09:55:40 +0800 Subject: [PATCH 11/22] FIX: optimize update_attentive_A function in KGAT --- recbole/model/knowledge_aware_recommender/kgat.py | 2 +- recbole/trainer/trainer.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/recbole/model/knowledge_aware_recommender/kgat.py b/recbole/model/knowledge_aware_recommender/kgat.py index 13dd1a01e..19fbe9879 100644 --- a/recbole/model/knowledge_aware_recommender/kgat.py +++ b/recbole/model/knowledge_aware_recommender/kgat.py @@ -268,7 +268,7 @@ def update_attentive_A(self): # Current PyTorch version does not support softmax on SparseCUDA, temporarily move to CPU to calculate softmax A_in = torch.sparse.FloatTensor(indices, kg_score, self.matrix_size).cpu() A_in = torch.sparse.softmax(A_in, dim=1).to(self.device) - self.A_in = copy.copy(A_in) + self.A_in = A_in def predict(self, interaction): user = interaction[self.USER_ID] diff --git a/recbole/trainer/trainer.py b/recbole/trainer/trainer.py index b033faa18..365503d2a 100644 --- a/recbole/trainer/trainer.py +++ b/recbole/trainer/trainer.py @@ -444,7 +444,9 @@ def _train_epoch(self, train_data, epoch_idx, loss_func=None): kg_total_loss = super()._train_epoch(train_data, epoch_idx, self.model.calculate_kg_loss) # update A - self.model.update_attentive_A() + self.model.eval() + with torch.no_grad(): + self.model.update_attentive_A() return rs_total_loss, kg_total_loss From 83f514e2a58c2a817f5f27602d058cb80b7c8528 Mon Sep 17 00:00:00 2001 From: guijiql <970955517@qq.com> Date: Fri, 18 Dec 2020 12:42:08 +0800 Subject: [PATCH 12/22] FIX: can't raise error in IndividualEvaluator --- recbole/evaluator/abstract_evaluator.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/recbole/evaluator/abstract_evaluator.py b/recbole/evaluator/abstract_evaluator.py index 2570fe8ad..1ab81d476 100644 --- a/recbole/evaluator/abstract_evaluator.py +++ b/recbole/evaluator/abstract_evaluator.py @@ -4,7 +4,7 @@ # @email : tsotfsk@outlook.com # UPDATE -# @Time : 2020/10/21, 2020/12/9 +# @Time : 2020/10/21, 2020/12/18 # @Author : Kaiyuan Li, Zhichao Feng # @email : tsotfsk@outlook.com, fzcbupt@gmail.com @@ -99,7 +99,7 @@ class IndividualEvaluator(BaseEvaluator): """ def __init__(self, config, metrics): super().__init__(config, metrics) - pass + self._check_args() def sample_collect(self, true_scores, pred_scores): """It is called when evaluation sample distribution is `uniform` or `popularity`. @@ -127,3 +127,7 @@ def get_score_matrix(self, true_scores, pred_scores): scores_matrix = self.sample_collect(true_scores, pred_scores) return scores_matrix + + def _check_args(self): + if self.full: + raise NotImplementedError('full sort can\'t use IndividualEvaluator') \ No newline at end of file From 10243bba1749f054deb24475cb9bf6628f67f591 Mon Sep 17 00:00:00 2001 From: guijiql <970955517@qq.com> Date: Fri, 18 Dec 2020 12:42:55 +0800 Subject: [PATCH 13/22] FIX: metrics disorder --- recbole/evaluator/proxy_evaluator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recbole/evaluator/proxy_evaluator.py b/recbole/evaluator/proxy_evaluator.py index fc91f5d48..9f181c4aa 100644 --- a/recbole/evaluator/proxy_evaluator.py +++ b/recbole/evaluator/proxy_evaluator.py @@ -32,9 +32,9 @@ def build(self): """ evaluator_list = [] - metrics_set = {metric.lower() for metric in self.metrics} + metrics_list = [metric.lower() for metric in self.metrics] for metrics, evaluator in metric_eval_bind: - used_metrics = list(metrics_set.intersection(set(metrics.keys()))) + used_metrics = [metric for metric in metrics_list if metric in metrics.keys()] if used_metrics: evaluator_list.append(evaluator(self.config, used_metrics)) return evaluator_list From ab95863f64500f206a9d110176c16032aeedd8f9 Mon Sep 17 00:00:00 2001 From: guijiql <970955517@qq.com> Date: Fri, 18 Dec 2020 12:46:16 +0800 Subject: [PATCH 14/22] FIX: GAUC calculation error --- recbole/evaluator/evaluators.py | 53 ++++++++++++++++++++++----------- recbole/evaluator/metrics.py | 35 +++++++++++++++++----- 2 files changed, 63 insertions(+), 25 deletions(-) diff --git a/recbole/evaluator/evaluators.py b/recbole/evaluator/evaluators.py index 10a41f715..a97fffa7e 100644 --- a/recbole/evaluator/evaluators.py +++ b/recbole/evaluator/evaluators.py @@ -11,7 +11,7 @@ import torch import numpy as np -from collections import ChainMap +from collections import ChainMap, Counter from recbole.evaluator.metrics import metrics_dict from recbole.evaluator.abstract_evaluator import GroupedEvalautor, IndividualEvaluator @@ -158,22 +158,35 @@ def get_user_pos_len_list(self, interaction, scores_tensor): user_len_list = interaction.user_len_list return pos_len_list, user_len_list - def get_pos_index(self, scores_tensor, pos_len_list, user_len_list): - """get the index of positive items + def rankdata(self, scores): + """Get the ranking of an ordered tensor, and take the average of the ranking for positions with equal values. - Args: - scores_tensor (tensor): the tensor of model output with size of `(N, )` - pos_len_list(list): number of positive items - user_len_list(list): number of all items + Args: + scores(tensor): an ordered tensor, with size of `(N, )` - Returns: - tensor: a matrix indicating whether the corresponding item is positive + Examples:: - """ - scores_matrix = self.get_score_matrix(scores_tensor, user_len_list) - _, n_index = torch.sort(scores_matrix, dim=-1, descending=True) - pos_index = (n_index < pos_len_list.reshape(-1, 1)) - return pos_index + >>> rankdata(tensor([[1,2,2,2,3,3,6],[2,2,2,2,4,4,5]])) + tensor([[1.0000, 3.0000, 3.0000, 3.0000, 5.5000, 5.5000, 7.0000], + [2.5000, 2.5000, 2.5000, 2.5000, 5.0000, 6.5000, 6.5000]]) + + """ + length, width = scores.shape + device = scores.device + true_tensor = torch.full((length, 1), True, dtype=np.bool, device=device) + + obs = torch.cat([true_tensor, scores[:, 1:] != scores[:, :-1]], dim=1) + # bias added to dense + bias = torch.arange(0, length, device=device).repeat(width).reshape(width, -1). \ + transpose(1, 0).reshape(-1) + dense = obs.view(-1).cumsum(0) + bias + + # cumulative counts of each unique value + count = torch.where(torch.cat([obs, true_tensor], dim=1))[1] + # get averange rank + avg_rank = .5 * (count[dense] + count[dense - 1] + 1).view(length, -1) + + return avg_rank def collect(self, interaction, scores_tensor): """collect the rank intermediate result of one batch, this function mainly implements ranking @@ -185,10 +198,16 @@ def collect(self, interaction, scores_tensor): """ pos_len_list, user_len_list = self.get_user_pos_len_list(interaction, scores_tensor) - pos_index = self.get_pos_index(scores_tensor, pos_len_list, user_len_list) - index_list = torch.arange(1, pos_index.shape[1] + 1).to(pos_index.device) - pos_rank_sum = torch.where(pos_index, index_list, torch.zeros_like(index_list)). \ + scores_matrix = self.get_score_matrix(scores_tensor, user_len_list) + desc_scores, desc_index = torch.sort(scores_matrix, dim=-1, descending=True) + + # get the index of positive items in the ranking list + pos_index = (desc_index < pos_len_list.reshape(-1, 1)) + + avg_rank = self.rankdata(desc_scores) + pos_rank_sum = torch.where(pos_index, avg_rank, torch.zeros_like(avg_rank)). \ sum(axis=-1).reshape(-1, 1) + return pos_rank_sum def evaluate(self, batch_matrix_list, eval_data): diff --git a/recbole/evaluator/metrics.py b/recbole/evaluator/metrics.py index cb31e1456..df6854257 100644 --- a/recbole/evaluator/metrics.py +++ b/recbole/evaluator/metrics.py @@ -4,7 +4,7 @@ # @email : tsotfsk@outlook.com # UPDATE -# @Time : 2020/08/12, 2020/12/9, 2020/9/16 +# @Time : 2020/08/12, 2020/12/18, 2020/9/16 # @Author : Kaiyuan Li, Zhichao Feng, Xingyu Pan # @email : tsotfsk@outlook.com, fzcbupt@gmail.com, panxy@ruc.edu.cn @@ -23,7 +23,6 @@ # TopK Metrics # - def hit_(pos_index, pos_len): r"""Hit_ (also known as hit ratio at :math:`N`) is a way of calculating how many 'hits' you have in an n-sized list of ranked items. @@ -129,7 +128,6 @@ def ndcg_(pos_index, pos_len): :math:`U^{te}` is for all users in the test set. """ - len_rank = np.full_like(pos_len, pos_index.shape[1]) idcg_len = np.where(pos_len > len_rank, len_rank, pos_len) @@ -166,10 +164,31 @@ def precision_(pos_index, pos_len): def gauc_(user_len_list, pos_len_list, pos_rank_sum): - frac = user_len_list - (pos_len_list - 1) / 2 - (1 / pos_len_list) * np.squeeze(pos_rank_sum) - neg_item_num = user_len_list - pos_len_list - user_auc = frac / neg_item_num + r"""GAUC_ (also known as Group Area Under Curve) is used to evaluate the two-class model, referring to + the area under the ROC curve grouped by user. + + .. _GAUC: https://dl.acm.org/doi/10.1145/3219819.3219823 + + Note: + It calculates the AUC score of each user, and finally obtains GAUC by weighting the user AUC + . It is also not limited to k. + + .. math:: + \mathrm {GAUC} = \frac {{{M} \times {(M+N+1)} - \frac{M \times (M+1)}{2}} - + \sum\limits_{i=1}^M rank_{i}} {{M} \times {N}} + + :math:`M` is the number of positive samples. + :math:`N` is the number of negative samples. + :math:`rank_i` is the descending rank of the ith positive sample. + Note: Due to our padding for `scores_tensor` in `RankEvaluator` with `-np.inf`, the padding value will influence + the ranks of origin items. Therefore, we use descending sort here and make an identity transformation to the + formula of `AUC`, which is shown in `auc_` function. For readability, we didn't do simplification in the code. + + """ + pair_num = (user_len_list + 1) * pos_len_list - pos_len_list * (pos_len_list + 1) / 2 - np.squeeze(pos_rank_sum) + neg_item_num = user_len_list - pos_len_list + user_auc = pair_num / (neg_item_num * pos_len_list) result = (user_auc * pos_len_list).sum() / pos_len_list.sum() return result @@ -188,11 +207,11 @@ def auc_(trues, preds): .. math:: \mathrm {AUC} = \frac{\sum\limits_{i=1}^M rank_{i} - - {{M} \times {(M+1)}}} {{M} \times {N}} + - \frac {{M} \times {(M+1)}}{2}} {{{M} \times {N}}} :math:`M` is the number of positive samples. :math:`N` is the number of negative samples. - :math:`rank_i` is the rank of the ith positive sample. + :math:`rank_i` is the ascending rank of the ith positive sample. """ fps, tps = _binary_clf_curve(trues, preds) From 48f1078ab99e8a44b87cdae8a01d2a9e2d7e38dd Mon Sep 17 00:00:00 2001 From: guijiql <970955517@qq.com> Date: Fri, 18 Dec 2020 13:45:48 +0800 Subject: [PATCH 15/22] FIX: rename & comment format --- recbole/evaluator/evaluators.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/recbole/evaluator/evaluators.py b/recbole/evaluator/evaluators.py index a97fffa7e..a0713188e 100644 --- a/recbole/evaluator/evaluators.py +++ b/recbole/evaluator/evaluators.py @@ -4,14 +4,14 @@ # @email : tsotfsk@outlook.com # UPDATE -# @Time : 2020/08/04, 2020/08/11, 2020/12/9 +# @Time : 2020/08/04, 2020/08/11, 2020/12/18 # @Author : Kaiyuan Li, Yupeng Hou, Zhichao Feng # @email : tsotfsk@outlook.com, houyupeng@ruc.edu.cn, fzcbupt@gmail.com import torch import numpy as np -from collections import ChainMap, Counter +from collections import ChainMap from recbole.evaluator.metrics import metrics_dict from recbole.evaluator.abstract_evaluator import GroupedEvalautor, IndividualEvaluator @@ -158,22 +158,27 @@ def get_user_pos_len_list(self, interaction, scores_tensor): user_len_list = interaction.user_len_list return pos_len_list, user_len_list - def rankdata(self, scores): + def average_rank(self, scores): """Get the ranking of an ordered tensor, and take the average of the ranking for positions with equal values. Args: scores(tensor): an ordered tensor, with size of `(N, )` - Examples:: + Returns: + torch.Tensor: average_rank + + Example: + >>> average_rank(tensor([[1,2,2,2,3,3,6],[2,2,2,2,4,4,5]])) + tensor([[1.0000, 3.0000, 3.0000, 3.0000, 5.5000, 5.5000, 7.0000], + [2.5000, 2.5000, 2.5000, 2.5000, 5.0000, 6.5000, 6.5000]]) - >>> rankdata(tensor([[1,2,2,2,3,3,6],[2,2,2,2,4,4,5]])) - tensor([[1.0000, 3.0000, 3.0000, 3.0000, 5.5000, 5.5000, 7.0000], - [2.5000, 2.5000, 2.5000, 2.5000, 5.0000, 6.5000, 6.5000]]) + Reference: + https://github.com/scipy/scipy/blob/v0.17.1/scipy/stats/stats.py#L5262-L5352 """ length, width = scores.shape device = scores.device - true_tensor = torch.full((length, 1), True, dtype=np.bool, device=device) + true_tensor = torch.full((length, 1), True, dtype=torch.bool, device=device) obs = torch.cat([true_tensor, scores[:, 1:] != scores[:, :-1]], dim=1) # bias added to dense @@ -183,7 +188,7 @@ def rankdata(self, scores): # cumulative counts of each unique value count = torch.where(torch.cat([obs, true_tensor], dim=1))[1] - # get averange rank + # get average rank avg_rank = .5 * (count[dense] + count[dense - 1] + 1).view(length, -1) return avg_rank @@ -204,7 +209,7 @@ def collect(self, interaction, scores_tensor): # get the index of positive items in the ranking list pos_index = (desc_index < pos_len_list.reshape(-1, 1)) - avg_rank = self.rankdata(desc_scores) + avg_rank = self.average_rank(desc_scores) pos_rank_sum = torch.where(pos_index, avg_rank, torch.zeros_like(avg_rank)). \ sum(axis=-1).reshape(-1, 1) From eb84ef352cd9384d7aa4b7f0bea73f545eff8eb7 Mon Sep 17 00:00:00 2001 From: guijiql <970955517@qq.com> Date: Mon, 21 Dec 2020 18:28:07 +0800 Subject: [PATCH 16/22] FEA: add parameters check in gauc --- recbole/evaluator/evaluators.py | 1 + recbole/evaluator/metrics.py | 37 ++++++++++++++++++++++++++------- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/recbole/evaluator/evaluators.py b/recbole/evaluator/evaluators.py index a0713188e..d20d73307 100644 --- a/recbole/evaluator/evaluators.py +++ b/recbole/evaluator/evaluators.py @@ -229,6 +229,7 @@ def evaluate(self, batch_matrix_list, eval_data): pos_len_list = eval_data.get_pos_len_list() user_len_list = eval_data.get_user_len_list() pos_rank_sum = torch.cat(batch_matrix_list, dim=0).cpu().numpy() + assert len(pos_len_list) == len(pos_rank_sum) # get metrics metric_dict = {} diff --git a/recbole/evaluator/metrics.py b/recbole/evaluator/metrics.py index df6854257..4e1ce123b 100644 --- a/recbole/evaluator/metrics.py +++ b/recbole/evaluator/metrics.py @@ -170,8 +170,11 @@ def gauc_(user_len_list, pos_len_list, pos_rank_sum): .. _GAUC: https://dl.acm.org/doi/10.1145/3219819.3219823 Note: - It calculates the AUC score of each user, and finally obtains GAUC by weighting the user AUC - . It is also not limited to k. + It calculates the AUC score of each user, and finally obtains GAUC by weighting the user AUC. + It is also not limited to k. Due to our padding for `scores_tensor` in `RankEvaluator` with + `-np.inf`, the padding value will influence the ranks of origin items. Therefore, we use + descending sort here and make an identity transformation to the formula of `AUC`, which is + shown in `auc_` function. For readability, we didn't do simplification in the code. .. math:: \mathrm {GAUC} = \frac {{{M} \times {(M+N+1)} - \frac{M \times (M+1)}{2}} - @@ -181,14 +184,32 @@ def gauc_(user_len_list, pos_len_list, pos_rank_sum): :math:`N` is the number of negative samples. :math:`rank_i` is the descending rank of the ith positive sample. - Note: Due to our padding for `scores_tensor` in `RankEvaluator` with `-np.inf`, the padding value will influence - the ranks of origin items. Therefore, we use descending sort here and make an identity transformation to the - formula of `AUC`, which is shown in `auc_` function. For readability, we didn't do simplification in the code. - """ + neg_len_list = user_len_list - pos_len_list + + # check positive and negative samples + all_with_pos = np.any(pos_len_list == 0) + all_with_neg = np.any(neg_len_list == 0) + non_zero_idx = np.full(len(user_len_list), True, dtype=np.bool) + if all_with_pos: + logger = getLogger() + logger.warning("No positive samples in some users, " + "true positive value should be meaningless, " + "these users have been removed from GAUC calculation") + non_zero_idx *= (pos_len_list != 0) + if all_with_neg: + logger = getLogger() + logger.warning("No negative samples in some users, " + "false positive value should be meaningless, " + "these users have been removed from GAUC calculation") + non_zero_idx *= (neg_len_list != 0) + if all_with_pos or all_with_neg: + user_len_list = user_len_list[non_zero_idx] + neg_len_list = user_len_list[non_zero_idx] + pos_rank_sum = pos_rank_sum[non_zero_idx] + pair_num = (user_len_list + 1) * pos_len_list - pos_len_list * (pos_len_list + 1) / 2 - np.squeeze(pos_rank_sum) - neg_item_num = user_len_list - pos_len_list - user_auc = pair_num / (neg_item_num * pos_len_list) + user_auc = pair_num / (neg_len_list * pos_len_list) result = (user_auc * pos_len_list).sum() / pos_len_list.sum() return result From aed05ee6e9c41493b24a368b9328e1b9a6fa1b58 Mon Sep 17 00:00:00 2001 From: guijiql <970955517@qq.com> Date: Mon, 21 Dec 2020 21:54:30 +0800 Subject: [PATCH 17/22] FEA: add GAUC check & GAUC test --- recbole/evaluator/evaluators.py | 2 ++ recbole/evaluator/metrics.py | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/recbole/evaluator/evaluators.py b/recbole/evaluator/evaluators.py index d20d73307..c7a6e9636 100644 --- a/recbole/evaluator/evaluators.py +++ b/recbole/evaluator/evaluators.py @@ -116,6 +116,8 @@ def _calculate_metrics(self, pos_len_list, topk_index): metric_fuc = metrics_dict[metric.lower()] result = metric_fuc(pos_idx_matrix, pos_len_list) result_list.append(result) # n_users x len(metrics) x len(ranks) + import pdb; + pdb.set_trace() result = np.stack(result_list, axis=0).mean(axis=1) # len(metrics) x len(ranks) return result diff --git a/recbole/evaluator/metrics.py b/recbole/evaluator/metrics.py index 4e1ce123b..2e875cead 100644 --- a/recbole/evaluator/metrics.py +++ b/recbole/evaluator/metrics.py @@ -4,7 +4,7 @@ # @email : tsotfsk@outlook.com # UPDATE -# @Time : 2020/08/12, 2020/12/18, 2020/9/16 +# @Time : 2020/08/12, 2020/12/21, 2020/9/16 # @Author : Kaiyuan Li, Zhichao Feng, Xingyu Pan # @email : tsotfsk@outlook.com, fzcbupt@gmail.com, panxy@ruc.edu.cn @@ -204,9 +204,9 @@ def gauc_(user_len_list, pos_len_list, pos_rank_sum): "these users have been removed from GAUC calculation") non_zero_idx *= (neg_len_list != 0) if all_with_pos or all_with_neg: - user_len_list = user_len_list[non_zero_idx] - neg_len_list = user_len_list[non_zero_idx] - pos_rank_sum = pos_rank_sum[non_zero_idx] + item_list = user_len_list, neg_len_list, pos_len_list, pos_rank_sum + user_len_list, neg_len_list, pos_len_list, pos_rank_sum = \ + map(lambda x: x[non_zero_idx], item_list) pair_num = (user_len_list + 1) * pos_len_list - pos_len_list * (pos_len_list + 1) / 2 - np.squeeze(pos_rank_sum) user_auc = pair_num / (neg_len_list * pos_len_list) From d6ea8e27902936626ca7be048c129501cc18e33c Mon Sep 17 00:00:00 2001 From: guijiql <970955517@qq.com> Date: Tue, 22 Dec 2020 11:09:21 +0800 Subject: [PATCH 18/22] update metrics.py --- recbole/evaluator/metrics.py | 1 + 1 file changed, 1 insertion(+) diff --git a/recbole/evaluator/metrics.py b/recbole/evaluator/metrics.py index 2e875cead..470b307d5 100644 --- a/recbole/evaluator/metrics.py +++ b/recbole/evaluator/metrics.py @@ -211,6 +211,7 @@ def gauc_(user_len_list, pos_len_list, pos_rank_sum): pair_num = (user_len_list + 1) * pos_len_list - pos_len_list * (pos_len_list + 1) / 2 - np.squeeze(pos_rank_sum) user_auc = pair_num / (neg_len_list * pos_len_list) result = (user_auc * pos_len_list).sum() / pos_len_list.sum() + return result From 36208416cd4e9722fc0382e8ccea53218b5b0807 Mon Sep 17 00:00:00 2001 From: fzc <970955517@qq.com> Date: Tue, 22 Dec 2020 11:17:39 +0800 Subject: [PATCH 19/22] Update evaluators.py --- recbole/evaluator/evaluators.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/recbole/evaluator/evaluators.py b/recbole/evaluator/evaluators.py index c7a6e9636..d20d73307 100644 --- a/recbole/evaluator/evaluators.py +++ b/recbole/evaluator/evaluators.py @@ -116,8 +116,6 @@ def _calculate_metrics(self, pos_len_list, topk_index): metric_fuc = metrics_dict[metric.lower()] result = metric_fuc(pos_idx_matrix, pos_len_list) result_list.append(result) # n_users x len(metrics) x len(ranks) - import pdb; - pdb.set_trace() result = np.stack(result_list, axis=0).mean(axis=1) # len(metrics) x len(ranks) return result From bd40a3ad47b4f32227e72474b2731515813018c8 Mon Sep 17 00:00:00 2001 From: guijiql <970955517@qq.com> Date: Tue, 22 Dec 2020 11:21:01 +0800 Subject: [PATCH 20/22] FEA: add ranking metric test --- tests/metrics/test_rank_metrics.py | 44 ++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tests/metrics/test_rank_metrics.py diff --git a/tests/metrics/test_rank_metrics.py b/tests/metrics/test_rank_metrics.py new file mode 100644 index 000000000..8bde0d9d1 --- /dev/null +++ b/tests/metrics/test_rank_metrics.py @@ -0,0 +1,44 @@ +# -*- encoding: utf-8 -*- +# @Time : 2020/12/21 +# @Author : Zhichao Feng +# @email : fzcbupt@gmail.com + + +import os +import sys +import unittest + +sys.path.append(os.getcwd()) +import numpy as np +from recbole.evaluator.metrics import metrics_dict + + +class TestCases(object): + user_len_list0 = np.array([2, 3, 5]) + pos_len_list0 = np.array([1, 2, 3]) + pos_rank_sum0 = np.array([1, 4, 9]) + + user_len_list1 = np.array([3, 6, 4]) + pos_len_list1 = np.array([1, 0, 4]) + pos_rank_sum1 = np.array([3, 0, 6]) + + +def get_result(name, case=0): + func = metrics_dict[name] + return func(getattr(TestCases, f'user_len_list{case}'), + getattr(TestCases, f'pos_len_list{case}'), + getattr(TestCases, f'pos_rank_sum{case}')) + + +class TestRankMetrics(unittest.TestCase): + def test_gauc(self): + name = 'gauc' + self.assertEqual(get_result(name, case=0), (1 * ((2 - (1 - 1) / 2 - 1 / 1) / (2 - 1)) + + 2 * ((3 - (2 - 1) / 2 - 4 / 2) / (3 - 2)) + + 3 * ((5 - (3 - 1) / 2 - 9 / 3) / (5 - 3))) + / (1 + 2 + 3)) + self.assertEqual(get_result(name, case=1), (3 - 0 - 3 / 1) / (3 - 1)) + + +if __name__ == "__main__": + unittest.main() From 6e9a9c6e042adde4cfc35d419b867319cbf54f03 Mon Sep 17 00:00:00 2001 From: guijiql <970955517@qq.com> Date: Tue, 22 Dec 2020 11:41:35 +0800 Subject: [PATCH 21/22] FIX: rename bool variable in GAUC & remove keys in build --- recbole/evaluator/metrics.py | 10 +++++----- recbole/evaluator/proxy_evaluator.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/recbole/evaluator/metrics.py b/recbole/evaluator/metrics.py index 470b307d5..483fd3f21 100644 --- a/recbole/evaluator/metrics.py +++ b/recbole/evaluator/metrics.py @@ -188,22 +188,22 @@ def gauc_(user_len_list, pos_len_list, pos_rank_sum): neg_len_list = user_len_list - pos_len_list # check positive and negative samples - all_with_pos = np.any(pos_len_list == 0) - all_with_neg = np.any(neg_len_list == 0) + any_without_pos = np.any(pos_len_list == 0) + any_without_neg = np.any(neg_len_list == 0) non_zero_idx = np.full(len(user_len_list), True, dtype=np.bool) - if all_with_pos: + if any_without_pos: logger = getLogger() logger.warning("No positive samples in some users, " "true positive value should be meaningless, " "these users have been removed from GAUC calculation") non_zero_idx *= (pos_len_list != 0) - if all_with_neg: + if any_without_neg: logger = getLogger() logger.warning("No negative samples in some users, " "false positive value should be meaningless, " "these users have been removed from GAUC calculation") non_zero_idx *= (neg_len_list != 0) - if all_with_pos or all_with_neg: + if any_without_pos or any_without_neg: item_list = user_len_list, neg_len_list, pos_len_list, pos_rank_sum user_len_list, neg_len_list, pos_len_list, pos_rank_sum = \ map(lambda x: x[non_zero_idx], item_list) diff --git a/recbole/evaluator/proxy_evaluator.py b/recbole/evaluator/proxy_evaluator.py index 9f181c4aa..f5faaebd6 100644 --- a/recbole/evaluator/proxy_evaluator.py +++ b/recbole/evaluator/proxy_evaluator.py @@ -34,7 +34,7 @@ def build(self): evaluator_list = [] metrics_list = [metric.lower() for metric in self.metrics] for metrics, evaluator in metric_eval_bind: - used_metrics = [metric for metric in metrics_list if metric in metrics.keys()] + used_metrics = [metric for metric in metrics_list if metric in metrics] if used_metrics: evaluator_list.append(evaluator(self.config, used_metrics)) return evaluator_list From 634bad684e0578e781f38acc443119be43bf1268 Mon Sep 17 00:00:00 2001 From: guijiql <970955517@qq.com> Date: Thu, 24 Dec 2020 11:44:09 +0800 Subject: [PATCH 22/22] FEA: add RankEvaluator collect test --- tests/metrics/test_rank_metrics.py | 48 ++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/tests/metrics/test_rank_metrics.py b/tests/metrics/test_rank_metrics.py index 8bde0d9d1..c60a2c485 100644 --- a/tests/metrics/test_rank_metrics.py +++ b/tests/metrics/test_rank_metrics.py @@ -10,10 +10,19 @@ sys.path.append(os.getcwd()) import numpy as np +import torch +from recbole.config import Config +from recbole.data.interaction import Interaction from recbole.evaluator.metrics import metrics_dict +from recbole.evaluator.evaluators import RankEvaluator +parameters_dict = { + 'model': 'BPR', + 'eval_setting': 'RO_RS,uni100', +} -class TestCases(object): + +class MetricsTestCases(object): user_len_list0 = np.array([2, 3, 5]) pos_len_list0 = np.array([1, 2, 3]) pos_rank_sum0 = np.array([1, 4, 9]) @@ -23,21 +32,42 @@ class TestCases(object): pos_rank_sum1 = np.array([3, 0, 6]) -def get_result(name, case=0): +class CollectTestCases(object): + interaction0 = Interaction({}, [0, 2, 3, 4], [2, 3, 4, 5]) + scores_tensor0 = torch.Tensor([0.1, 0.2, + 0.1, 0.1, 0.2, + 0.2, 0.2, 0.2, 0.2, + 0.3, 0.2, 0.1, 0.4, 0.3]) + + +def get_metric_result(name, case=0): func = metrics_dict[name] - return func(getattr(TestCases, f'user_len_list{case}'), - getattr(TestCases, f'pos_len_list{case}'), - getattr(TestCases, f'pos_rank_sum{case}')) + return func(getattr(MetricsTestCases, f'user_len_list{case}'), + getattr(MetricsTestCases, f'pos_len_list{case}'), + getattr(MetricsTestCases, f'pos_rank_sum{case}')) + + +def get_collect_result(evaluator, case=0): + func = evaluator.collect + return func(getattr(CollectTestCases, f'interaction{case}'), + getattr(CollectTestCases, f'scores_tensor{case}')) class TestRankMetrics(unittest.TestCase): def test_gauc(self): name = 'gauc' - self.assertEqual(get_result(name, case=0), (1 * ((2 - (1 - 1) / 2 - 1 / 1) / (2 - 1)) + - 2 * ((3 - (2 - 1) / 2 - 4 / 2) / (3 - 2)) + - 3 * ((5 - (3 - 1) / 2 - 9 / 3) / (5 - 3))) + self.assertEqual(get_metric_result(name, case=0), (1 * ((2 - (1 - 1) / 2 - 1 / 1) / (2 - 1)) + + 2 * ((3 - (2 - 1) / 2 - 4 / 2) / (3 - 2)) + + 3 * ((5 - (3 - 1) / 2 - 9 / 3) / (5 - 3))) / (1 + 2 + 3)) - self.assertEqual(get_result(name, case=1), (3 - 0 - 3 / 1) / (3 - 1)) + self.assertEqual(get_metric_result(name, case=1), (3 - 0 - 3 / 1) / (3 - 1)) + + def test_collect(self): + config = Config('BPR', 'ml-100k', config_dict=parameters_dict) + metrics = ['GAUC'] + rank_evaluator = RankEvaluator(config, metrics) + self.assertEqual(get_collect_result(rank_evaluator, case=0).squeeze().cpu().numpy().tolist(), + np.array([0, (2 + 3) / 2 * 2, (1 + 2 + 3 + 4) / 4 * 3, 1 + (2 + 3) / 2 + 4 + 5]).tolist()) if __name__ == "__main__":