From 0bbfc3c02d6ae396c77b4e9e71e8a8c0ce444ffd Mon Sep 17 00:00:00 2001 From: chenyushuo <297086016@qq.com> Date: Fri, 25 Dec 2020 16:21:47 +0800 Subject: [PATCH 1/5] FEA: add case_study.py (resolve #506) --- recbole/utils/case_study.py | 73 +++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 recbole/utils/case_study.py diff --git a/recbole/utils/case_study.py b/recbole/utils/case_study.py new file mode 100644 index 000000000..aa9962bf2 --- /dev/null +++ b/recbole/utils/case_study.py @@ -0,0 +1,73 @@ +import numpy as np +import torch + +from recbole.data.dataloader.general_dataloader import GeneralFullDataLoader +from recbole.data.dataloader.sequential_dataloader import SequentialFullDataLoader + + +@torch.no_grad() +def get_scores(uid_series, model, test_data): + """Calculate the scores of all items for each user in uid_series. + + Note: + The score of [pad] and history items will be set into -inf. + + Args: + uid_series (np.ndarray): User id series + model (AbstractRecommender): Model to predict + test_data (AbstractDataLoader): The test_data of model + + Returns: + torch.Tensor: the scores of all items for each user in uid_series. + """ + uid_field = test_data.dataset.uid_field + dataset = test_data.dataset + model.eval() + + if isinstance(test_data, GeneralFullDataLoader): + index = np.isin(test_data.user_df[uid_field].numpy(), uid_series) + input_interaction = test_data.user_df[index] + history_item = test_data.uid2history_item[input_interaction[uid_field]] + history_row = torch.cat([torch.full_like(hist_iid, i) for i, hist_iid in enumerate(history_item)]) + history_col = torch.cat(list(history_item)) + history_index = history_row, history_col + elif isinstance(test_data, SequentialFullDataLoader): + index = np.isin(test_data.uid_list, uid_series) + input_interaction = test_data.augmentation(test_data.item_list_index[index], + test_data.target_index[index], test_data.item_list_length[index]) + history_index = None + else: + raise NotImplementedError + + # Get scores of all items + try: + scores = model.full_sort_predict(input_interaction) + except NotImplementedError: + input_interaction = input_interaction.repeat(dataset.item_num) + input_interaction.update(test_data.get_item_feature().repeat(len(uid_series))) + scores = model.predict(input_interaction) + + scores = scores.view(-1, dataset.item_num) + scores[:, 0] = -np.inf # set scores of [pad] to -inf + if history_index is not None: + scores[history_index] = -np.inf # set scores of history items to -inf + + return scores + + +def get_topk(uid_series, model, test_data, k): + """Calculate the top-k items' scores and ids for each user in uid_series. + + Args: + uid_series (np.ndarray): User id series + model (AbstractRecommender): Model to predict + test_data (AbstractDataLoader): The test_data of model + k (int): The top-k items. + + Returns: + tuple: + - topk_scores (torch.Tensor): The scores of topk items. + - topk_index (torch.Tensor): The index of topk items, which is also the internal ids of items. + """ + scores = get_scores(uid_series, model, test_data) + return torch.topk(scores, k) From e25819cefa9fe5bb535b774f06ff5bcced50303a Mon Sep 17 00:00:00 2001 From: chenyushuo <297086016@qq.com> Date: Fri, 25 Dec 2020 16:37:44 +0800 Subject: [PATCH 2/5] FORMAT: code format in trainer.py --- recbole/trainer/trainer.py | 43 +++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/recbole/trainer/trainer.py b/recbole/trainer/trainer.py index 7a6565366..185770444 100644 --- a/recbole/trainer/trainer.py +++ b/recbole/trainer/trainer.py @@ -551,9 +551,10 @@ class xgboostTrainer(AbstractTrainer): """xgboostTrainer is designed for XGBOOST. """ + def __init__(self, config, model): super(xgboostTrainer, self).__init__(config, model) - + self.logger = getLogger() self.label_field = config['LABEL_FIELD'] @@ -605,22 +606,22 @@ def _interaction_to_DMatrix(self, interaction): interaction_np = interaction.numpy() cur_data = np.array([]) for key, value in interaction_np.items(): - value = np.resize(value,(value.shape[0],1)) + value = np.resize(value, (value.shape[0], 1)) if key != self.label_field: if cur_data.shape[0] == 0: cur_data = value else: cur_data = np.hstack((cur_data, value)) - - return xgb.DMatrix(data = cur_data, - label = interaction_np[self.label_field], - weight = self.weight, - base_margin = self.base_margin, - missing = self.missing, - silent = self.silent, - feature_names = self.feature_names, - feature_types = self.feature_types, - nthread = self.nthread) + + return xgb.DMatrix(data=cur_data, + label=interaction_np[self.label_field], + weight=self.weight, + base_margin=self.base_margin, + missing=self.missing, + silent=self.silent, + feature_names=self.feature_names, + feature_types=self.feature_types, + nthread=self.nthread) def _train_at_once(self, train_data, valid_data): r""" @@ -631,11 +632,11 @@ def _train_at_once(self, train_data, valid_data): """ self.dtrain = self._interaction_to_DMatrix(train_data.dataset[:]) self.dvalid = self._interaction_to_DMatrix(valid_data.dataset[:]) - self.evals = [(self.dtrain,'train'),(self.dvalid, 'valid')] - self.model = xgb.train(self.params, self.dtrain, self.num_boost_round, - self.evals, self.obj, self.feval, self.maximize, - self.early_stopping_rounds, self.evals_result, - self.verbose_eval, self.xgb_model, self.callbacks) + self.evals = [(self.dtrain, 'train'), (self.dvalid, 'valid')] + self.model = xgb.train(self.params, self.dtrain, self.num_boost_round, + self.evals, self.obj, self.feval, self.maximize, + self.early_stopping_rounds, self.evals_result, + self.verbose_eval, self.xgb_model, self.callbacks) self.model.save_model(self.saved_model_file) self.xgb_model = self.saved_model_file @@ -645,13 +646,13 @@ def _valid_epoch(self, valid_data): Args: valid_data (XgboostDataLoader): XgboostDataLoader, which is the same with GeneralDataLoader. """ - valid_result = self.evaluate(valid_data) + valid_result = self.evaluate(valid_data) valid_score = calculate_valid_score(valid_result, self.valid_metric) return valid_result, valid_score def fit(self, train_data, valid_data=None, verbose=True, saved=True): # load model - if self.xgb_model != None: + if self.xgb_model is not None: self.model.load_model(self.xgb_model) self.best_valid_score = 0. @@ -666,7 +667,7 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True): valid_result, valid_score = self._valid_epoch(valid_data) valid_end_time = time() valid_score_output = "epoch %d evaluating [time: %.2fs, valid_score: %f]" % \ - (epoch_idx, valid_end_time - valid_start_time, valid_score) + (epoch_idx, valid_end_time - valid_start_time, valid_score) valid_result_output = 'valid result: \n' + dict2str(valid_result) if verbose: self.logger.info(valid_score_output) @@ -674,7 +675,7 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True): self.best_valid_score = valid_score self.best_valid_result = valid_result - + return self.best_valid_score, self.best_valid_result def evaluate(self, eval_data, load_best_model=True, model_file=None): From b7d3fd1dc3a659d0cbab397b929abea34a154097 Mon Sep 17 00:00:00 2001 From: chenyushuo <297086016@qq.com> Date: Fri, 25 Dec 2020 17:17:14 +0800 Subject: [PATCH 3/5] DOC: add header to case_study.py --- recbole/utils/case_study.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/recbole/utils/case_study.py b/recbole/utils/case_study.py index aa9962bf2..45d15a2bc 100644 --- a/recbole/utils/case_study.py +++ b/recbole/utils/case_study.py @@ -1,3 +1,13 @@ +# @Time : 2020/12/25 +# @Author : Yushuo Chen +# @Email : chenyushuo@ruc.edu.cn + +# UPDATE +# @Time : 2020/12/25 +# @Author : Yushuo Chen +# @email : chenyushuo@ruc.edu.cn + + import numpy as np import torch From 6d839ee371940f27e8a71676d356bab120de6949 Mon Sep 17 00:00:00 2001 From: chenyushuo <297086016@qq.com> Date: Fri, 25 Dec 2020 17:34:29 +0800 Subject: [PATCH 4/5] FEA: Add progress bar to trainer.py (resolve #516) --- recbole/properties/overall.yaml | 2 + recbole/quick_start/quick_start.py | 5 +- recbole/trainer/trainer.py | 80 ++++++++++++++++++++---------- recbole/utils/argument_list.py | 4 +- 4 files changed, 63 insertions(+), 28 deletions(-) diff --git a/recbole/properties/overall.yaml b/recbole/properties/overall.yaml index e9dd78d57..f0149e399 100644 --- a/recbole/properties/overall.yaml +++ b/recbole/properties/overall.yaml @@ -6,6 +6,7 @@ state: INFO reproducibility: True data_path: 'dataset/' checkpoint_dir: 'saved' +show_progress: True # training settings epochs: 300 @@ -13,6 +14,7 @@ train_batch_size: 2048 learner: adam learning_rate: 0.001 training_neg_sample_num: 1 +training_neg_sample_distribution: uniform eval_step: 1 stopping_step: 10 clip_grad_norm: ~ diff --git a/recbole/quick_start/quick_start.py b/recbole/quick_start/quick_start.py index 2294ca15e..b7dcf5722 100644 --- a/recbole/quick_start/quick_start.py +++ b/recbole/quick_start/quick_start.py @@ -49,10 +49,11 @@ def run_recbole(model=None, dataset=None, config_file_list=None, config_dict=Non trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model) # model training - best_valid_score, best_valid_result = trainer.fit(train_data, valid_data, saved=saved) + best_valid_score, best_valid_result = trainer.fit(train_data, valid_data, saved=saved, + show_progress=config['show_progress']) # model evaluation - test_result = trainer.evaluate(test_data, load_best_model=saved) + test_result = trainer.evaluate(test_data, load_best_model=saved, show_progress=config['show_progress']) logger.info('best valid result: {}'.format(best_valid_result)) logger.info('test result: {}'.format(test_result)) diff --git a/recbole/trainer/trainer.py b/recbole/trainer/trainer.py index 185770444..102a40b54 100644 --- a/recbole/trainer/trainer.py +++ b/recbole/trainer/trainer.py @@ -13,7 +13,7 @@ """ import os -import itertools +from tqdm import tqdm import torch import torch.optim as optim from torch.nn.utils.clip_grad import clip_grad_norm_ @@ -120,7 +120,7 @@ def _build_optimizer(self): optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) return optimizer - def _train_epoch(self, train_data, epoch_idx, loss_func=None): + def _train_epoch(self, train_data, epoch_idx, loss_func=None, show_progress=False): r"""Train the model in an epoch Args: @@ -128,6 +128,7 @@ def _train_epoch(self, train_data, epoch_idx, loss_func=None): epoch_idx (int): The current epoch id. loss_func (function): The loss function of :attr:`model`. If it is ``None``, the loss function will be :attr:`self.model.calculate_loss`. Defaults to ``None``. + show_progress (bool): Show progress of epoch training. Defaults to ``False``. Returns: float/tuple: The sum of loss returned by all batches in this epoch. If the loss in each batch contains @@ -137,7 +138,16 @@ def _train_epoch(self, train_data, epoch_idx, loss_func=None): self.model.train() loss_func = loss_func or self.model.calculate_loss total_loss = None - for batch_idx, interaction in enumerate(train_data): + iter_data = ( + tqdm( + enumerate(train_data), + total=len(train_data), + desc=f"Train {epoch_idx:>5}", + ) + if show_progress + else enumerate(train_data) + ) + for batch_idx, interaction in iter_data: interaction = interaction.to(self.device) self.optimizer.zero_grad() losses = loss_func(interaction) @@ -155,17 +165,18 @@ def _train_epoch(self, train_data, epoch_idx, loss_func=None): self.optimizer.step() return total_loss - def _valid_epoch(self, valid_data): + def _valid_epoch(self, valid_data, show_progress=False): r"""Valid the model with valid data Args: - valid_data (DataLoader): the valid data + valid_data (DataLoader): the valid data. + show_progress (bool): Show progress of epoch evaluate. Defaults to ``False``. Returns: float: valid score dict: valid result """ - valid_result = self.evaluate(valid_data, load_best_model=False) + valid_result = self.evaluate(valid_data, load_best_model=False, show_progress=show_progress) valid_score = calculate_valid_score(valid_result, self.valid_metric) return valid_score, valid_result @@ -222,7 +233,7 @@ def _generate_train_loss_output(self, epoch_idx, s_time, e_time, losses): train_loss_output += 'train loss: %.4f' % losses return train_loss_output + ']' - def fit(self, train_data, valid_data=None, verbose=True, saved=True): + def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progress=False): r"""Train the model based on the train data and the valid data. Args: @@ -231,6 +242,7 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True): If it's None, the early_stopping is invalid. verbose (bool, optional): whether to write training and evaluation information to logger, default: True saved (bool, optional): whether to save the model parameters, default: True + show_progress (bool): Show progress of epoch training and evaluate. Defaults to ``False``. Returns: (float, dict): best valid score and best valid result. If valid_data is None, it returns (-1, None) @@ -241,7 +253,7 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True): for epoch_idx in range(self.start_epoch, self.epochs): # train training_start_time = time() - train_loss = self._train_epoch(train_data, epoch_idx) + train_loss = self._train_epoch(train_data, epoch_idx, show_progress=show_progress) self.train_loss_dict[epoch_idx] = sum(train_loss) if isinstance(train_loss, tuple) else train_loss training_end_time = time() train_loss_output = \ @@ -259,7 +271,7 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True): continue if (epoch_idx + 1) % self.eval_step == 0: valid_start_time = time() - valid_score, valid_result = self._valid_epoch(valid_data) + valid_score, valid_result = self._valid_epoch(valid_data, show_progress=show_progress) self.best_valid_score, self.cur_step, stop_flag, update_flag = early_stopping( valid_score, self.best_valid_score, self.cur_step, max_step=self.stopping_step, bigger=self.valid_metric_bigger) @@ -313,7 +325,7 @@ def _full_sort_batch_eval(self, batched_data): return interaction, scores @torch.no_grad() - def evaluate(self, eval_data, load_best_model=True, model_file=None): + def evaluate(self, eval_data, load_best_model=True, model_file=None, show_progress=False): r"""Evaluate the model based on the eval data. Args: @@ -322,6 +334,7 @@ def evaluate(self, eval_data, load_best_model=True, model_file=None): It should be set True, if users want to test the model after training. model_file (str, optional): the saved model file, default: None. If users want to test the previously trained model file, they can set this parameter. + show_progress (bool): Show progress of epoch evaluate. Defaults to ``False``. Returns: dict: eval result, key is the eval metric and value in the corresponding metric value @@ -347,7 +360,16 @@ def evaluate(self, eval_data, load_best_model=True, model_file=None): self.tot_item_num = eval_data.dataset.item_num batch_matrix_list = [] - for batch_idx, batched_data in enumerate(eval_data): + iter_data = ( + tqdm( + enumerate(eval_data), + total=len(eval_data), + desc=f"Evaluate ", + ) + if show_progress + else enumerate(eval_data) + ) + for batch_idx, batched_data in iter_data: if eval_data.dl_type == DataLoaderType.FULL: interaction, scores = self._full_sort_batch_eval(batched_data) else: @@ -413,7 +435,7 @@ def __init__(self, config, model): self.train_rec_step = config['train_rec_step'] self.train_kg_step = config['train_kg_step'] - def _train_epoch(self, train_data, epoch_idx, loss_func=None): + def _train_epoch(self, train_data, epoch_idx, loss_func=None, show_progress=False): if self.train_rec_step is None or self.train_kg_step is None: interaction_state = KGDataLoaderState.RSKG elif epoch_idx % (self.train_rec_step + self.train_kg_step) < self.train_rec_step: @@ -422,9 +444,11 @@ def _train_epoch(self, train_data, epoch_idx, loss_func=None): interaction_state = KGDataLoaderState.KG train_data.set_mode(interaction_state) if interaction_state in [KGDataLoaderState.RSKG, KGDataLoaderState.RS]: - return super()._train_epoch(train_data, epoch_idx) + return super()._train_epoch(train_data, epoch_idx, show_progress=show_progress) elif interaction_state in [KGDataLoaderState.KG]: - return super()._train_epoch(train_data, epoch_idx, self.model.calculate_kg_loss) + return super()._train_epoch(train_data, epoch_idx, + loss_func=self.model.calculate_kg_loss, + show_progress=show_progress) return None @@ -436,14 +460,16 @@ class KGATTrainer(Trainer): def __init__(self, config, model): super(KGATTrainer, self).__init__(config, model) - def _train_epoch(self, train_data, epoch_idx, loss_func=None): + def _train_epoch(self, train_data, epoch_idx, loss_func=None, show_progress=False): # train rs train_data.set_mode(KGDataLoaderState.RS) - rs_total_loss = super()._train_epoch(train_data, epoch_idx) + rs_total_loss = super()._train_epoch(train_data, epoch_idx, show_progress=show_progress) # train kg train_data.set_mode(KGDataLoaderState.KG) - kg_total_loss = super()._train_epoch(train_data, epoch_idx, self.model.calculate_kg_loss) + kg_total_loss = super()._train_epoch(train_data, epoch_idx, + loss_func=self.model.calculate_kg_loss, + show_progress=show_progress) # update A self.model.eval() @@ -478,12 +504,12 @@ def save_pretrained_model(self, epoch, saved_model_file): } torch.save(state, saved_model_file) - def pretrain(self, train_data, verbose=True): + def pretrain(self, train_data, verbose=True, show_progress=False): for epoch_idx in range(self.start_epoch, self.epochs): # train training_start_time = time() - train_loss = self._train_epoch(train_data, epoch_idx) + train_loss = self._train_epoch(train_data, epoch_idx, show_progress=show_progress) self.train_loss_dict[epoch_idx] = sum(train_loss) if isinstance(train_loss, tuple) else train_loss training_end_time = time() train_loss_output = \ @@ -502,11 +528,11 @@ def pretrain(self, train_data, verbose=True): return self.best_valid_score, self.best_valid_result - def fit(self, train_data, valid_data=None, verbose=True, saved=True): + def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progress=False): if self.model.train_stage == 'pretrain': - return self.pretrain(train_data, verbose) + return self.pretrain(train_data, verbose, show_progress) elif self.model.train_stage == 'finetune': - return super().fit(train_data, valid_data, verbose, saved) + return super().fit(train_data, valid_data, verbose, saved, show_progress) else: raise ValueError("Please make sure that the 'train_stage' is 'pretrain' or 'finetune' ") @@ -520,19 +546,23 @@ def __init__(self, config, model): super(MKRTrainer, self).__init__(config, model) self.kge_interval = config['kge_interval'] - def _train_epoch(self, train_data, epoch_idx, loss_func=None): + def _train_epoch(self, train_data, epoch_idx, loss_func=None, show_progress=False): rs_total_loss, kg_total_loss = 0., 0. # train rs self.logger.info('Train RS') train_data.set_mode(KGDataLoaderState.RS) - rs_total_loss = super()._train_epoch(train_data, epoch_idx, self.model.calculate_rs_loss) + rs_total_loss = super()._train_epoch(train_data, epoch_idx, + loss_func=self.model.calculate_rs_loss, + show_progress=show_progress) # train kg if epoch_idx % self.kge_interval == 0: self.logger.info('Train KG') train_data.set_mode(KGDataLoaderState.KG) - kg_total_loss = super()._train_epoch(train_data, epoch_idx, self.model.calculate_kg_loss) + kg_total_loss = super()._train_epoch(train_data, epoch_idx, + loss_func=self.model.calculate_kg_loss, + show_progress=show_progress) return rs_total_loss, kg_total_loss diff --git a/recbole/utils/argument_list.py b/recbole/utils/argument_list.py index 05f6ad18a..5bff7c26a 100644 --- a/recbole/utils/argument_list.py +++ b/recbole/utils/argument_list.py @@ -7,11 +7,13 @@ 'seed', 'reproducibility', 'state', - 'data_path'] + 'data_path', + 'show_progress'] training_arguments = ['epochs', 'train_batch_size', 'learner', 'learning_rate', 'training_neg_sample_num', + 'training_neg_sample_distribution', 'eval_step', 'stopping_step', 'checkpoint_dir'] From 3b73c4561c5c0cd747d33fd1a540bb6e5ad30d23 Mon Sep 17 00:00:00 2001 From: chenyushuo <297086016@qq.com> Date: Fri, 25 Dec 2020 17:41:08 +0800 Subject: [PATCH 5/5] FEA: Add callback function hook for end of training an epoch (resolve #517) --- recbole/trainer/trainer.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/recbole/trainer/trainer.py b/recbole/trainer/trainer.py index 102a40b54..53d82da37 100644 --- a/recbole/trainer/trainer.py +++ b/recbole/trainer/trainer.py @@ -233,7 +233,7 @@ def _generate_train_loss_output(self, epoch_idx, s_time, e_time, losses): train_loss_output += 'train loss: %.4f' % losses return train_loss_output + ']' - def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progress=False): + def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progress=False, callback_fn=None): r"""Train the model based on the train data and the valid data. Args: @@ -243,6 +243,8 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre verbose (bool, optional): whether to write training and evaluation information to logger, default: True saved (bool, optional): whether to save the model parameters, default: True show_progress (bool): Show progress of epoch training and evaluate. Defaults to ``False``. + callback_fn (callable): Optional callback function executed at end of epoch. + Includes (epoch_idx, valid_score) input arguments. Returns: (float, dict): best valid score and best valid result. If valid_data is None, it returns (-1, None) @@ -290,6 +292,9 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre self.logger.info(update_output) self.best_valid_result = valid_result + if callback_fn: + callback_fn(epoch_idx, valid_score) + if stop_flag: stop_output = 'Finished training, best eval result in epoch %d' % \ (epoch_idx - self.cur_step * self.eval_step) @@ -528,11 +533,11 @@ def pretrain(self, train_data, verbose=True, show_progress=False): return self.best_valid_score, self.best_valid_result - def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progress=False): + def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progress=False, callback_fn=None): if self.model.train_stage == 'pretrain': return self.pretrain(train_data, verbose, show_progress) elif self.model.train_stage == 'finetune': - return super().fit(train_data, valid_data, verbose, saved, show_progress) + return super().fit(train_data, valid_data, verbose, saved, show_progress, callback_fn) else: raise ValueError("Please make sure that the 'train_stage' is 'pretrain' or 'finetune' ")