From 5c781decf32cadb0137cfe7908bdd520f9451145 Mon Sep 17 00:00:00 2001 From: Guan-JW <15692276873@ruc.edu.cn> Date: Sat, 6 Mar 2021 14:50:52 +0800 Subject: [PATCH 01/16] give logger a color --- .../developer_guide/customize_models.rst | 4 +- recbole/config/configurator.py | 12 +- recbole/config/eval_setting.py | 18 +- .../data/dataloader/abstract_dataloader.py | 2 +- recbole/data/dataloader/general_dataloader.py | 2 +- .../data/dataloader/knowledge_dataloader.py | 2 +- .../data/dataloader/sequential_dataloader.py | 2 +- recbole/data/dataloader/user_dataloader.py | 2 +- recbole/data/dataset/dataset.py | 66 +++--- recbole/data/dataset/kg_dataset.py | 10 +- recbole/data/dataset/sequential_dataset.py | 4 +- recbole/data/dataset/social_dataset.py | 4 +- recbole/data/utils.py | 203 +++++++++++------- recbole/evaluator/metrics.py | 16 +- recbole/model/abstract_recommender.py | 2 +- .../context_aware_recommender/xdeepfm.py | 4 +- recbole/model/general_recommender/fism.py | 6 +- recbole/model/general_recommender/gcmc.py | 2 +- recbole/model/general_recommender/nais.py | 6 +- recbole/quick_start/quick_start.py | 4 +- recbole/trainer/trainer.py | 32 +-- 21 files changed, 231 insertions(+), 172 deletions(-) diff --git a/docs/source/developer_guide/customize_models.rst b/docs/source/developer_guide/customize_models.rst index e6acc6c90..85567f800 100644 --- a/docs/source/developer_guide/customize_models.rst +++ b/docs/source/developer_guide/customize_models.rst @@ -255,8 +255,8 @@ Then, we can use NewModel in RecBole as follows (e.g., `run.py`): # model evaluation test_result = trainer.evaluate(test_data) - logger.info('best valid result: {}'.format(best_valid_result)) - logger.info('test result: {}'.format(test_result)) + logger.info('\033[1;34mbest valid result\033[0m: {}'.format(best_valid_result)) + logger.info('\033[1;34mtest result\033[0m: {}'.format(test_result)) Then, we can run NewModel: diff --git a/recbole/config/configurator.py b/recbole/config/configurator.py index 670184a09..065231ce3 100644 --- a/recbole/config/configurator.py +++ b/recbole/config/configurator.py @@ -159,7 +159,7 @@ def _load_cmd_line(self): cmd_config_dict[cmd_arg_name] = cmd_arg_value if len(unrecognized_args) > 0: logger = getLogger() - logger.warning('command line args [{}] will not be used in RecBole'.format(' '.join(unrecognized_args))) + logger.warning('\033[1;31mcommand line args [{}] will not be used in RecBole\033[0m'.format(' '.join(unrecognized_args))) cmd_config_dict = self._convert_config_dict(cmd_config_dict) return cmd_config_dict @@ -337,18 +337,18 @@ def __contains__(self, key): return key in self.final_config_dict def __str__(self): - args_info = '' + args_info = '\n' for category in self.parameters: - args_info += category + ' Hyper Parameters: \n' + args_info += '\033[1;35m' + category + ' Hyper Parameters: \033[0m\n' args_info += '\n'.join([ - "{}={}".format(arg, value) for arg, value in self.final_config_dict.items() + "\033[0;36m{}\033[0m = \033[33m{}\033[0m".format(arg, value) for arg, value in self.final_config_dict.items() if arg in self.parameters[category] ]) args_info += '\n\n' - args_info += 'Other Hyper Parameters: \n' + args_info += '\033[1;35mOther Hyper Parameters: \033[0m\n' args_info += '\n'.join([ - "{}={}".format(arg, value) for arg, value in self.final_config_dict.items() + "\033[0;36m{}\033[0m = \033[33m{}\033[0m".format(arg, value) for arg, value in self.final_config_dict.items() if arg not in sum(list(self.parameters.values()) + [['model', 'dataset', 'config_files']], []) ]) args_info += '\n\n' diff --git a/recbole/config/eval_setting.py b/recbole/config/eval_setting.py index cc1cc230d..5ee4b81db 100644 --- a/recbole/config/eval_setting.py +++ b/recbole/config/eval_setting.py @@ -90,27 +90,27 @@ def __init__(self, config): setattr(self, args, config[args]) def __str__(self): - info = ['Evaluation Setting:'] + info = ['\033[1;35mEvaluation Setting:\033[0m'] if self.group_field: - info.append('Group by {}'.format(self.group_field)) + info.append('\033[1;34mGroup by\033[0m {}'.format(self.group_field)) else: - info.append('No Grouping') + info.append('\033[0;33mNo Grouping\033[0m') if self.ordering_args is not None and self.ordering_args['strategy'] != 'none': - info.append('Ordering: {}'.format(self.ordering_args)) + info.append('\033[1;34mOrdering\033[0m: {}'.format(self.ordering_args)) else: - info.append('No Ordering') + info.append('\033[0;33mNo Ordering\033[0m') if self.split_args is not None and self.split_args['strategy'] != 'none': - info.append('Splitting: {}'.format(self.split_args)) + info.append('\033[1;34mSplitting\033[0m: {}'.format(self.split_args)) else: - info.append('No Splitting') + info.append('\033[0;33mNo Splitting\033[0m') if self.neg_sample_args is not None and self.neg_sample_args['strategy'] != 'none': - info.append('Negative Sampling: {}'.format(self.neg_sample_args)) + info.append('\033[1;34mNegative Sampling\033[0m: {}'.format(self.neg_sample_args)) else: - info.append('No Negative Sampling') + info.append('\033[0;33mNo Negative Sampling\033[0m') return '\n\t'.join(info) diff --git a/recbole/data/dataloader/abstract_dataloader.py b/recbole/data/dataloader/abstract_dataloader.py index 73e642472..43acf03a6 100644 --- a/recbole/data/dataloader/abstract_dataloader.py +++ b/recbole/data/dataloader/abstract_dataloader.py @@ -121,7 +121,7 @@ def set_batch_size(self, batch_size): raise PermissionError('Cannot change dataloader\'s batch_size while iteration') if self.batch_size != batch_size: self.batch_size = batch_size - self.logger.warning(f'Batch size is changed to {batch_size}.') + self.logger.warning(f'\033[1;31mBatch size is changed to {batch_size}\033[0m.') def upgrade_batch_size(self, batch_size): """Upgrade the batch_size of the dataloader, if input batch_size is bigger than current batch_size. diff --git a/recbole/data/dataloader/general_dataloader.py b/recbole/data/dataloader/general_dataloader.py index 818571586..4cc44011f 100644 --- a/recbole/data/dataloader/general_dataloader.py +++ b/recbole/data/dataloader/general_dataloader.py @@ -259,7 +259,7 @@ def pr_end(self): return len(self.uid_list) def _shuffle(self): - self.logger.warnning('GeneralFullDataLoader can\'t shuffle') + self.logger.warnning('\033[1;31mGeneralFullDataLoader can\'t shuffle\033[0m') def _next_batch_data(self): user_df = self.user_df[self.pr:self.pr + self.step] diff --git a/recbole/data/dataloader/knowledge_dataloader.py b/recbole/data/dataloader/knowledge_dataloader.py index 6b6bb00ac..88396976c 100644 --- a/recbole/data/dataloader/knowledge_dataloader.py +++ b/recbole/data/dataloader/knowledge_dataloader.py @@ -55,7 +55,7 @@ def setup(self): """ if self.shuffle is False: self.shuffle = True - self.logger.warning('kg based dataloader must shuffle the data') + self.logger.warning('\033[1;31mkg based dataloader must shuffle the data\033[0m') @property def pr_end(self): diff --git a/recbole/data/dataloader/sequential_dataloader.py b/recbole/data/dataloader/sequential_dataloader.py index 51d3a845d..7ee37b05e 100644 --- a/recbole/data/dataloader/sequential_dataloader.py +++ b/recbole/data/dataloader/sequential_dataloader.py @@ -264,7 +264,7 @@ def _neg_sampling(self, inter_feat): pass def _shuffle(self): - self.logger.warnning('SequentialFullDataLoader can\'t shuffle') + self.logger.warnning('\033[1;31mSequentialFullDataLoader can\'t shuffle\033[0m') def _next_batch_data(self): interaction = super()._next_batch_data() diff --git a/recbole/data/dataloader/user_dataloader.py b/recbole/data/dataloader/user_dataloader.py index 2d2fd62a0..cd86753aa 100644 --- a/recbole/data/dataloader/user_dataloader.py +++ b/recbole/data/dataloader/user_dataloader.py @@ -47,7 +47,7 @@ def setup(self): """ if self.shuffle is False: self.shuffle = True - self.logger.warning('UserDataLoader must shuffle the data') + self.logger.warning('\033[1;31mUserDataLoader must shuffle the data\033[0m') @property def pr_end(self): diff --git a/recbole/data/dataset/dataset.py b/recbole/data/dataset/dataset.py index 334ce14e1..37210434b 100644 --- a/recbole/data/dataset/dataset.py +++ b/recbole/data/dataset/dataset.py @@ -105,7 +105,7 @@ def _from_scratch(self): """Load dataset from scratch. Initialize attributes firstly, then load data from atomic files, pre-process the dataset lastly. """ - self.logger.debug(f'Loading {self.__class__} from scratch.') + self.logger.debug(f'\033[0;32mLoading {self.__class__} from scratch.\033[0m') self._get_preset() self._get_field_from_config() @@ -138,8 +138,8 @@ def _get_field_from_config(self): 'USER_ID_FIELD and ITEM_ID_FIELD need to be set at the same time or not set at the same time.' ) - self.logger.debug(f'uid_field: {self.uid_field}') - self.logger.debug(f'iid_field: {self.iid_field}') + self.logger.debug(f'\033[0;34muid_field\033[0m: {self.uid_field}') + self.logger.debug(f'\033[0;34miid_field\033[0m: {self.iid_field}') def _data_processing(self): """Data preprocessing, including: @@ -208,7 +208,7 @@ def _restore_saved_dataset(self, saved_dataset): Args: saved_dataset (str): path for the saved dataset. """ - self.logger.debug(f'Restoring dataset from [{saved_dataset}].') + self.logger.debug(f'\033[0;32mRestoring dataset from [{saved_dataset}].\033[0m') if (saved_dataset is None) or (not os.path.isdir(saved_dataset)): raise ValueError(f'Filepath [{saved_dataset}] need to be a dir.') @@ -366,9 +366,9 @@ def _get_load_and_unload_col(self, source): if load_col and unload_col: raise ValueError(f'load_col [{load_col}] and unload_col [{unload_col}] can not be set the same time.') - self.logger.debug(f'[{source}]: ') - self.logger.debug(f'\t load_col: [{load_col}]') - self.logger.debug(f'\t unload_col: [{unload_col}]') + self.logger.debug(f'\033[0;35m[{source}]: \033[0m') + self.logger.debug(f'\t \033[0;34mload_col\033[0m: [{load_col}]') + self.logger.debug(f'\t \033[0;34munload_col\033[0m: [{unload_col}]') return load_col, unload_col def _load_feat(self, filepath, source): @@ -388,7 +388,7 @@ def _load_feat(self, filepath, source): Their length is limited only after calling :meth:`~_dict_to_interaction` or :meth:`~_dataframe_to_interaction` """ - self.logger.debug(f'Loading feature from [{filepath}] (source: [{source}]).') + self.logger.debug(f'\033[0;32mLoading feature from [{filepath}] (source: [{source}]).\033[0m') load_col, unload_col = self._get_load_and_unload_col(source) if load_col == set(): @@ -446,11 +446,11 @@ def _user_item_feat_preparation(self): if self.user_feat is not None: new_user_df = pd.DataFrame({self.uid_field: np.arange(self.user_num)}) self.user_feat = pd.merge(new_user_df, self.user_feat, on=self.uid_field, how='left') - self.logger.debug('ordering user features by user id.') + self.logger.debug('\033[0;32mordering user features by user id.\033[0m') if self.item_feat is not None: new_item_df = pd.DataFrame({self.iid_field: np.arange(self.item_num)}) self.item_feat = pd.merge(new_item_df, self.item_feat, on=self.iid_field, how='left') - self.logger.debug('ordering item features by user id.') + self.logger.debug('\033[0;32mordering item features by user id.\033[0m') def _preload_weight_matrix(self): """Transfer preload weight features into :class:`numpy.ndarray` with shape ``[id_token_length]`` @@ -505,8 +505,8 @@ def _preload_weight_matrix(self): matrix[pid] = prow[:max_len] else: self.logger.warning( - f'Field [{preload_value_field}] with type [{value_ftype}] is not `float` or `float_seq`, ' - f'which will not be handled by preload matrix.' + f'\033[1;31mField [{preload_value_field}] with type [{value_ftype}] is not `float` or `float_seq`, \033[0m' + f'\033[1;31mwhich will not be handled by preload matrix.\033[0m' ) continue self._preloaded_weight[preload_id_field] = matrix @@ -520,7 +520,7 @@ def _fill_nan(self): For fields with type :obj:`~recbole.utils.enum_type.FeatureType.FLOAT`, missing value will be filled by the average of original data. """ - self.logger.debug('Filling nan') + self.logger.debug('\033[0;32mFilling nan\033[0m') for feat_name in self.feat_name_list: feat = getattr(self, feat_name) @@ -554,13 +554,13 @@ def _normalize(self): if field not in self.field2type: raise ValueError(f'Field [{field}] does not exist.') elif ftype != FeatureType.FLOAT and ftype != FeatureType.FLOAT_SEQ: - self.logger.warning(f'{field} is not a FLOAT/FLOAT_SEQ feat, which will not be normalized.') + self.logger.warning(f'\033[1;31m{field} is not a FLOAT/FLOAT_SEQ feat, which will not be normalized.\033[0m') elif self.config['normalize_all']: fields = self.float_like_fields else: return - self.logger.debug(f'Normalized fields: {fields}') + self.logger.debug(f'\033[0;34mNormalized fields\033[0m: {fields}') for feat_name in self.feat_name_list: feat = getattr(self, feat_name) @@ -572,7 +572,7 @@ def _normalize(self): lst = feat[field].values mx, mn = max(lst), min(lst) if mx == mn: - self.logger.warning(f'All the same value in [{field}] from [{feat}_feat].') + self.logger.warning(f'\033[1;31mAll the same value in [{field}] from [{feat}_feat].\033[0m') feat[field] = 1.0 else: feat[field] = (lst - mn) / (mx - mn) @@ -581,7 +581,7 @@ def _normalize(self): lst = feat[field].agg(np.concatenate) mx, mn = max(lst), min(lst) if mx == mn: - self.logger.warning(f'All the same value in [{field}] from [{feat}_feat].') + self.logger.warning(f'\033[1;31mAll the same value in [{field}] from [{feat}_feat].\033[0m') lst = 1.0 else: lst = (lst - mn) / (mx - mn) @@ -597,14 +597,14 @@ def _filter_nan_user_or_item(self): dropped_feat = feat.index[feat[field].isnull()] if len(dropped_feat): self.logger.warning( - f'In {name}_feat, line {list(dropped_feat + 2)}, {field} do not exist, so they will be removed.' + f'\033[1;31mIn {name}_feat, line {list(dropped_feat + 2)}, {field} do not exist, so they will be removed.\033[0m' ) feat.drop(feat.index[dropped_feat], inplace=True) if field is not None: dropped_inter = self.inter_feat.index[self.inter_feat[field].isnull()] if len(dropped_inter): self.logger.warning( - f'In inter_feat, line {list(dropped_inter + 2)}, {field} do not exist, so they will be removed.' + f'\033[1;31mIn inter_feat, line {list(dropped_inter + 2)}, {field} do not exist, so they will be removed.\033[0m' ) self.inter_feat.drop(self.inter_feat.index[dropped_inter], inplace=True) @@ -629,8 +629,8 @@ def _remove_duplication(self): ) else: self.logger.warning( - f'Timestamp field has not been loaded or specified, ' - f'thus strategy [{keep}] of duplication removal may be meaningless.' + f'\033[1;31mTimestamp field has not been loaded or specified, \033[0m' + f'\033[1;31mthus strategy [{keep}] of duplication removal may be meaningless.\033[0m' ) self.inter_feat.drop_duplicates(subset=[self.uid_field, self.iid_field], keep=keep, inplace=True) @@ -715,7 +715,7 @@ def _get_illegal_ids_by_inter_num(self, field, feat, inter_num, max_num=None, mi Returns: set: illegal ids, whose inter num out of [min_num, max_num] """ - self.logger.debug(f'get_illegal_ids_by_inter_num: field=[{field}], max_num=[{max_num}], min_num=[{min_num}]') + self.logger.debug(f'\033[0;34mget_illegal_ids_by_inter_num\033[0m: field=[{field}], max_num=[{max_num}], min_num=[{min_num}]') max_num = max_num or np.inf min_num = min_num or -1 @@ -760,7 +760,7 @@ def _drop_by_value(self, val, cmp): if val is None: return [] - self.logger.debug(f'drop_by_value: val={val}') + self.logger.debug(f'\033[0;34mdrop_by_value\033[0m: val={val}') filter_field = [] for field in val: if field not in self.field2type: @@ -908,7 +908,7 @@ def _remap_ID_all(self): """Get ``config['fields_in_same_space']`` firstly, and remap each. """ fields_in_same_space = self._get_fields_in_same_space() - self.logger.debug(f'fields_in_same_space: {fields_in_same_space}') + self.logger.debug(f'\033[0;34mfields_in_same_space\033[0m: {fields_in_same_space}') for field_set in fields_in_same_space: remap_list = self._get_remap_list(field_set) self._remap(remap_list) @@ -1206,19 +1206,19 @@ def __repr__(self): return self.__str__() def __str__(self): - info = [self.dataset_name] + info = ['\033[1;35m' + self.dataset_name + '\033[0m'] if self.uid_field: info.extend([ - f'The number of users: {self.user_num}', f'Average actions of users: {self.avg_actions_of_users}' + f'\033[0;34mThe number of users\033[0m: {self.user_num}', f'\033[0;34mAverage actions of users\033[0m: {self.avg_actions_of_users}' ]) if self.iid_field: info.extend([ - f'The number of items: {self.item_num}', f'Average actions of items: {self.avg_actions_of_items}' + f'\033[0;34mThe number of items\033[0m: {self.item_num}', f'\033[0;34mAverage actions of items\033[0m: {self.avg_actions_of_items}' ]) - info.append(f'The number of inters: {self.inter_num}') + info.append(f'\033[0;34mThe number of inters\033[0m: {self.inter_num}') if self.uid_field and self.iid_field: - info.append(f'The sparsity of the dataset: {self.sparsity * 100}%') - info.append(f'Remain Fields: {list(self.field2type)}') + info.append(f'\033[0;34mThe sparsity of the dataset\033[0m: {self.sparsity * 100}%') + info.append(f'\033[0;34mRemain Fields\033[0m: {list(self.field2type)}') return '\n'.join(info) def copy(self, new_inter_feat): @@ -1247,7 +1247,7 @@ def _drop_unused_col(self): for field in unused_fields: if field not in feat: self.logger.warning( - f'Field [{field}] is not in [{feat_name}_feat], which can not be set in `unused_col`.' + f'\033[1;31mField [{field}] is not in [{feat_name}_feat], which can not be set in `unused_col`.\033[0m' ) continue self._del_col(feat, field) @@ -1615,8 +1615,8 @@ def _history_matrix(self, row, value_field=None): col_num = np.max(history_len) if col_num > max_col_num * 0.2: self.logger.warning( - f'Max value of {row}\'s history interaction records has reached ' - f'{col_num / max_col_num * 100}% of the total.' + f'\033[1;31mMax value of {row}\'s history interaction records has reached \033[0m' + f'\033[1;31m{col_num / max_col_num * 100}% of the total.\033[0m' ) history_matrix = np.zeros((row_num, col_num), dtype=np.int64) diff --git a/recbole/data/dataset/kg_dataset.py b/recbole/data/dataset/kg_dataset.py index 11b8f20f6..71b4c5536 100644 --- a/recbole/data/dataset/kg_dataset.py +++ b/recbole/data/dataset/kg_dataset.py @@ -80,8 +80,8 @@ def _get_field_from_config(self): self._check_field('head_entity_field', 'tail_entity_field', 'relation_field', 'entity_field') self.set_field_property(self.entity_field, FeatureType.TOKEN, FeatureSource.KG, 1) - self.logger.debug(f'relation_field: {self.relation_field}') - self.logger.debug(f'entity_field: {self.entity_field}') + self.logger.debug(f'\033[0;34mrelation_field\033[0m: {self.relation_field}') + self.logger.debug(f'\033[0;34mentity_field\033[0m: {self.entity_field}') def _data_processing(self): self._set_field2ent_level() @@ -138,7 +138,7 @@ def save(self, filepath): raise NotImplementedError() def _load_kg(self, token, dataset_path): - self.logger.debug(f'Loading kg from [{dataset_path}].') + self.logger.debug(f'\033[0;32mLoading kg from [{dataset_path}].\033[0m') kg_path = os.path.join(dataset_path, f'{token}.kg') if not os.path.isfile(kg_path): raise ValueError(f'[{token}.kg] not found in [{dataset_path}].') @@ -153,7 +153,7 @@ def _check_kg(self, kg): assert self.relation_field in kg, kg_warn_message.format(self.relation_field) def _load_link(self, token, dataset_path): - self.logger.debug(f'Loading link from [{dataset_path}].') + self.logger.debug(f'\033[0;32mLoading link from [{dataset_path}].\033[0m') link_path = os.path.join(dataset_path, f'{token}.link') if not os.path.isfile(link_path): raise ValueError(f'[{token}.link] not found in [{dataset_path}].') @@ -207,7 +207,7 @@ def _get_ent_fields_in_same_space(self): if self._contain_ent_field(field_set): field_set = self._remove_ent_field(field_set) ent_fields.update(field_set) - self.logger.debug(f'ent_fields: {fields_in_same_space}') + self.logger.debug(f'\033[0;34ment_fields\033[0m: {fields_in_same_space}') return ent_fields def _remove_ent_field(self, field_set): diff --git a/recbole/data/dataset/sequential_dataset.py b/recbole/data/dataset/sequential_dataset.py index 98e0d7df3..02e4842be 100644 --- a/recbole/data/dataset/sequential_dataset.py +++ b/recbole/data/dataset/sequential_dataset.py @@ -123,8 +123,8 @@ def inter_matrix(self, form='coo', value_field=None): if not self.uid_field or not self.iid_field: raise ValueError('dataset does not exist uid/iid, thus can not converted to sparse matrix.') - self.logger.warning('Load interaction matrix may lead to label leakage from testing phase, this implementation ' - 'only provides the interactions corresponding to specific phase') + self.logger.warning('\033[1;31mLoad interaction matrix may lead to label leakage from testing phase, this implementation \033[0m' + '\033[1;31monly provides the interactions corresponding to specific phase\033[0m') local_inter_feat = self.inter_feat[self.uid_list] return self._create_sparse_matrix(local_inter_feat, self.uid_field, self.iid_field, form, value_field) diff --git a/recbole/data/dataset/social_dataset.py b/recbole/data/dataset/social_dataset.py index f53016ccf..281030328 100644 --- a/recbole/data/dataset/social_dataset.py +++ b/recbole/data/dataset/social_dataset.py @@ -45,8 +45,8 @@ def _get_field_from_config(self): self.target_field = self.config['TARGET_ID_FIELD'] self._check_field('source_field', 'target_field') - self.logger.debug(f'source_id_field: {self.source_field}') - self.logger.debug(f'target_id_field: {self.target_field}') + self.logger.debug(f'\033[0;34msource_id_field\033[0m: {self.source_field}') + self.logger.debug(f'\033[0;34mtarget_id_field\033[0m: {self.target_field}') def _load_data(self, token, dataset_path): """Load ``.net`` additionally. diff --git a/recbole/data/utils.py b/recbole/data/utils.py index 76a4c0cc4..ff9e9a819 100644 --- a/recbole/data/utils.py +++ b/recbole/data/utils.py @@ -77,56 +77,39 @@ def data_preparation(config, dataset, save=False): train_dataset, valid_dataset, test_dataset = built_datasets phases = ['train', 'valid', 'test'] sampler = None - logger = getLogger() - train_neg_sample_args = config['train_neg_sample_args'] - eval_neg_sample_args = es.neg_sample_args if save: save_datasets(config['checkpoint_dir'], name=phases, dataset=built_datasets) - # Training - train_kwargs = { - 'config': config, - 'dataset': train_dataset, - 'batch_size': config['train_batch_size'], - 'dl_format': config['MODEL_INPUT_TYPE'], - 'shuffle': True, - } - if train_neg_sample_args['strategy'] != 'none': + kwargs = {} + if config['training_neg_sample_num']: if dataset.label_field in dataset.inter_feat: raise ValueError( f'`training_neg_sample_num` should be 0 ' f'if inter_feat have label_field [{dataset.label_field}].' ) if model_type != ModelType.SEQUENTIAL: - sampler = Sampler(phases, built_datasets, train_neg_sample_args['distribution']) + sampler = Sampler(phases, built_datasets, config['train_neg_sample_args']['distribution']) else: - sampler = RepeatableSampler(phases, dataset, train_neg_sample_args['distribution']) - train_kwargs['sampler'] = sampler.set_phase('train') - train_kwargs['neg_sample_args'] = train_neg_sample_args + sampler = RepeatableSampler(phases, dataset, config['train_neg_sample_args']['distribution']) + kwargs['sampler'] = sampler.set_phase('train') + kwargs['neg_sample_args'] = copy.deepcopy(config['train_neg_sample_args']) if model_type == ModelType.KNOWLEDGE: - kg_sampler = KGSampler(dataset, train_neg_sample_args['distribution']) - train_kwargs['kg_sampler'] = kg_sampler - - dataloader = get_data_loader('train', config, train_neg_sample_args) - logger.info(f'Build [{dataloader.__name__}] for [train] with format [{train_kwargs["dl_format"]}]') - if train_neg_sample_args['strategy'] != 'none': - logger.info(f'[train] Negative Sampling: {train_neg_sample_args}') - else: - logger.info(f'[train] No Negative Sampling') - logger.info(f'[train] batch_size = [{train_kwargs["batch_size"]}], shuffle = [{train_kwargs["shuffle"]}]\n') - train_data = dataloader(**train_kwargs) - - # Evaluation - eval_kwargs = { - 'config': config, - 'batch_size': config['eval_batch_size'], - 'dl_format': InputType.POINTWISE, - 'shuffle': False, - } - valid_kwargs = {'dataset': valid_dataset} - test_kwargs = {'dataset': test_dataset} - if eval_neg_sample_args['strategy'] != 'none': + kg_sampler = KGSampler(dataset, config['train_neg_sample_args']['distribution']) + kwargs['kg_sampler'] = kg_sampler + train_data = dataloader_construct( + name='train', + config=config, + eval_setting=es, + dataset=train_dataset, + dl_format=config['MODEL_INPUT_TYPE'], + batch_size=config['train_batch_size'], + shuffle=True, + **kwargs + ) + + kwargs = {} + if es.neg_sample_args['strategy'] != 'none': if dataset.label_field in dataset.inter_feat: raise ValueError( f'It can not validate with `{es.es_str[1]}` ' @@ -134,26 +117,86 @@ def data_preparation(config, dataset, save=False): ) if sampler is None: if model_type != ModelType.SEQUENTIAL: - sampler = Sampler(phases, built_datasets, eval_neg_sample_args['distribution']) + sampler = Sampler(phases, built_datasets, es.neg_sample_args['distribution']) else: - sampler = RepeatableSampler(phases, dataset, eval_neg_sample_args['distribution']) - else: - sampler.set_distribution(eval_neg_sample_args['distribution']) - eval_kwargs['neg_sample_args'] = eval_neg_sample_args - valid_kwargs['sampler'] = sampler.set_phase('valid') - test_kwargs['sampler'] = sampler.set_phase('test') - valid_kwargs.update(eval_kwargs) - test_kwargs.update(eval_kwargs) + sampler = RepeatableSampler(phases, dataset, es.neg_sample_args['distribution']) + sampler.set_distribution(es.neg_sample_args['distribution']) + kwargs['sampler'] = [sampler.set_phase('valid'), sampler.set_phase('test')] + kwargs['neg_sample_args'] = copy.deepcopy(es.neg_sample_args) + valid_data, test_data = dataloader_construct( + name='evaluation', + config=config, + eval_setting=es, + dataset=[valid_dataset, test_dataset], + batch_size=config['eval_batch_size'], + **kwargs + ) - dataloader = get_data_loader('evaluation', config, eval_neg_sample_args) - logger.info(f'Build [{dataloader.__name__}] for [evaluation] with format [{eval_kwargs["dl_format"]}]') - logger.info(es) - logger.info(f'[evaluation] batch_size = [{eval_kwargs["batch_size"]}], shuffle = [{eval_kwargs["shuffle"]}]\n') + return train_data, valid_data, test_data - valid_data = dataloader(**valid_kwargs) - test_data = dataloader(**test_kwargs) - return train_data, valid_data, test_data +def dataloader_construct( + name, config, eval_setting, dataset, dl_format=InputType.POINTWISE, batch_size=1, shuffle=False, **kwargs +): + """Get a correct dataloader class by calling :func:`get_data_loader` to construct dataloader. + + Args: + name (str): The stage of dataloader. It can only take two values: 'train' or 'evaluation'. + config (Config): An instance object of Config, used to record parameter information. + eval_setting (EvalSetting): An instance object of EvalSetting, used to record evaluation settings. + dataset (Dataset or list of Dataset): The split dataset for constructing dataloader. + dl_format (InputType, optional): The input type of dataloader. Defaults to + :obj:`~recbole.utils.enum_type.InputType.POINTWISE`. + batch_size (int, optional): The batch_size of dataloader. Defaults to ``1``. + shuffle (bool, optional): Whether the dataloader will be shuffle after a round. Defaults to ``False``. + **kwargs: Other input args of dataloader, such as :attr:`sampler`, :attr:`kg_sampler` + and :attr:`neg_sample_args`. The meaning of these args is the same as these args in some dataloaders. + + Returns: + AbstractDataLoader or list of AbstractDataLoader: Constructed dataloader in split dataset. + """ + if not isinstance(dataset, list): + dataset = [dataset] + + if not isinstance(batch_size, list): + batch_size = [batch_size] * len(dataset) + + if len(dataset) != len(batch_size): + raise ValueError(f'Dataset {dataset} and batch_size {batch_size} should have the same length.') + + kwargs_list = [{} for _ in range(len(dataset))] + for key, value in kwargs.items(): + key = [key] * len(dataset) + if not isinstance(value, list): + value = [value] * len(dataset) + if len(dataset) != len(value): + raise ValueError(f'Dataset {dataset} and {key} {value} should have the same length.') + for kw, k, w in zip(kwargs_list, key, value): + kw[k] = w + + model_type = config['MODEL_TYPE'] + logger = getLogger() + logger.info(f'\033[1;35mBuild\033[0m \033[1;33m[{model_type}]\033[0m DataLoader for \033[1;33m[{name}]\033[0m with format \033[1;33m[{dl_format}]\033[0m') + logger.info(eval_setting) + logger.info(f'\033[0;36mbatch_size\033[0m = \033[0;33m[{batch_size}]\033[0m, \033[0;36mshuffle\033[0m = \033[0;33m[{shuffle}]\033[0m\n') + + if 'neg_sample_args' in kwargs: + dataloader = get_data_loader(name, config, kwargs['neg_sample_args']) + else: + dataloader = get_data_loader(name, config, eval_setting.neg_sample_args) + + try: + ret = [ + dataloader(config=config, dataset=ds, batch_size=bs, dl_format=dl_format, shuffle=shuffle, **kw) + for ds, bs, kw in zip(dataset, batch_size, kwargs_list) + ] + except TypeError: + raise ValueError('training_neg_sample_num should be 0') + + if len(ret) == 1: + return ret[0] + else: + return ret def save_datasets(save_path, name, dataset): @@ -198,25 +241,39 @@ def get_data_loader(name, config, neg_sample_args): if config['model'] in register_table: return register_table[config['model']](name, config, neg_sample_args) - model_type_table = { - ModelType.GENERAL: 'General', - ModelType.TRADITIONAL: 'General', - ModelType.CONTEXT: 'Context', - ModelType.SEQUENTIAL: 'Sequential', - ModelType.DECISIONTREE: 'DecisionTree', - } - neg_sample_strategy_table = { - 'none': 'DataLoader', - 'by': 'NegSampleDataLoader', - 'full': 'FullDataLoader', - } model_type = config['MODEL_TYPE'] - neg_sample_strategy = neg_sample_args['strategy'] - dataloader_module = importlib.import_module('recbole.data.dataloader') - - if model_type in model_type_table and neg_sample_strategy in neg_sample_strategy_table: - dataloader_name = model_type_table[model_type] + neg_sample_strategy_table[neg_sample_strategy] - return getattr(dataloader_module, dataloader_name) + if name == 'train' and config['train_neg_sample_args'] != None: + neg_sample_strategy = config['train_neg_sample_args']['strategy'] + else: + neg_sample_strategy = neg_sample_args['strategy'] + if model_type == ModelType.GENERAL or model_type == ModelType.TRADITIONAL: + if neg_sample_strategy == 'none': + return GeneralDataLoader + elif neg_sample_strategy == 'by': + return GeneralNegSampleDataLoader + elif neg_sample_strategy == 'full': + return GeneralFullDataLoader + elif model_type == ModelType.CONTEXT: + if neg_sample_strategy == 'none': + return ContextDataLoader + elif neg_sample_strategy == 'by': + return ContextNegSampleDataLoader + elif neg_sample_strategy == 'full': + return ContextFullDataLoader + elif model_type == ModelType.SEQUENTIAL: + if neg_sample_strategy == 'none': + return SequentialDataLoader + elif neg_sample_strategy == 'by': + return SequentialNegSampleDataLoader + elif neg_sample_strategy == 'full': + return SequentialFullDataLoader + elif model_type == ModelType.DECISIONTREE: + if neg_sample_strategy == 'none': + return DecisionTreeDataLoader + elif neg_sample_strategy == 'by': + return DecisionTreeNegSampleDataLoader + elif neg_sample_strategy == 'full': + return DecisionTreeFullDataLoader elif model_type == ModelType.KNOWLEDGE: if neg_sample_strategy == 'by': if name == 'train': @@ -226,6 +283,8 @@ def get_data_loader(name, config, neg_sample_args): elif neg_sample_strategy == 'full': return GeneralFullDataLoader elif neg_sample_strategy == 'none': + # return GeneralDataLoader + # TODO 训练也可以为none? 看general的逻辑似乎是都可以为None raise NotImplementedError( 'The use of external negative sampling for knowledge model has not been implemented' ) diff --git a/recbole/evaluator/metrics.py b/recbole/evaluator/metrics.py index 9c470e12b..ed38891f8 100644 --- a/recbole/evaluator/metrics.py +++ b/recbole/evaluator/metrics.py @@ -195,17 +195,17 @@ def gauc_(user_len_list, pos_len_list, pos_rank_sum): if any_without_pos: logger = getLogger() logger.warning( - "No positive samples in some users, " - "true positive value should be meaningless, " - "these users have been removed from GAUC calculation" + "\033[1;31mNo positive samples in some users, \033[0m" + "\033[1;31mtrue positive value should be meaningless, \033[0m" + "\033[1;31mthese users have been removed from GAUC calculation\033[0m" ) non_zero_idx *= (pos_len_list != 0) if any_without_neg: logger = getLogger() logger.warning( - "No negative samples in some users, " - "false positive value should be meaningless, " - "these users have been removed from GAUC calculation" + "\033[1;31mNo negative samples in some users, \033[0m" + "\033[1;31mfalse positive value should be meaningless, \033[0m" + "\033[1;31mthese users have been removed from GAUC calculation\033[0m" ) non_zero_idx *= (neg_len_list != 0) if any_without_pos or any_without_neg: @@ -253,14 +253,14 @@ def auc_(trues, preds): if fps[-1] <= 0: logger = getLogger() - logger.warning("No negative samples in y_true, " "false positive value should be meaningless") + logger.warning("\033[1;31mNo negative samples in y_true,\033[0m " "false positive value should be meaningless\033[0m") fpr = np.repeat(np.nan, fps.shape) else: fpr = fps / fps[-1] if tps[-1] <= 0: logger = getLogger() - logger.warning("No positive samples in y_true, " "true positive value should be meaningless") + logger.warning("\033[1;31mNo positive samples in y_true,\033[0m " "\033[1;31mtrue positive value should be meaningless\033[0m") tpr = np.repeat(np.nan, tps.shape) else: tpr = tps / tps[-1] diff --git a/recbole/model/abstract_recommender.py b/recbole/model/abstract_recommender.py index d5d7f8dc0..95f02f6ba 100644 --- a/recbole/model/abstract_recommender.py +++ b/recbole/model/abstract_recommender.py @@ -71,7 +71,7 @@ def __str__(self): """ model_parameters = filter(lambda p: p.requires_grad, self.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) - return super().__str__() + '\nTrainable parameters: {}'.format(params) + return super().__str__() + '\n\033[1;34mTrainable parameters\033[0m: {}'.format(params) class GeneralRecommender(AbstractRecommender): diff --git a/recbole/model/context_aware_recommender/xdeepfm.py b/recbole/model/context_aware_recommender/xdeepfm.py index 0af6ef770..74f95f395 100644 --- a/recbole/model/context_aware_recommender/xdeepfm.py +++ b/recbole/model/context_aware_recommender/xdeepfm.py @@ -49,8 +49,8 @@ def __init__(self, config, dataset): self.cin_layer_size = list(map(lambda x: int(x // 2 * 2), temp_cin_size)) if self.cin_layer_size[:-1] != temp_cin_size[:-1]: self.logger.warning( - 'Layer size of CIN should be even except for the last layer when direct is True.' - 'It is changed to {}'.format(self.cin_layer_size) + '\033[1;31mLayer size of CIN should be even except for the last layer when direct is True.\033[0m' + '\033[1;31mIt is changed to {}\033[0m'.format(self.cin_layer_size) ) # Create a convolutional layer for each CIN layer diff --git a/recbole/model/general_recommender/fism.py b/recbole/model/general_recommender/fism.py index fdfecc216..02b165275 100644 --- a/recbole/model/general_recommender/fism.py +++ b/recbole/model/general_recommender/fism.py @@ -49,9 +49,9 @@ def __init__(self, config, dataset): if self.split_to > 0: self.group = torch.chunk(torch.arange(self.n_items).to(self.device), self.split_to) else: - self.logger.warning('Pay Attetion!! the `split_to` is set to 0. If you catch a OMM error in this case, ' + \ - 'you need to increase it \n\t\t\tuntil the error disappears. For example, ' + \ - 'you can append it in the command line such as `--split_to=5`') + self.logger.warning('\033[1;31mPay Attetion!! the `split_to` is set to 0. If you catch a OMM error in this case,\033[0m ' + \ + '\033[1;31myou need to increase it \n\t\t\tuntil the error disappears. For example, \033[0m' + \ + '\033[1;31myou can append it in the command line such as `--split_to=5`\033[0m') # define layers and loss # construct source and destination item embedding matrix diff --git a/recbole/model/general_recommender/gcmc.py b/recbole/model/general_recommender/gcmc.py index e3715493d..82fde721b 100644 --- a/recbole/model/general_recommender/gcmc.py +++ b/recbole/model/general_recommender/gcmc.py @@ -93,7 +93,7 @@ def __init__(self, config, dataset): div = self.gcn_output_dim // len(self.support) if self.gcn_output_dim % len(self.support) != 0: self.logger.warning( - "HIDDEN[0] (=%d) of stack layer is adjusted to %d (in %d splits)." % + "\033[1;31mHIDDEN[0] (=%d) of stack layer is adjusted to %d (in %d splits).\033[0m" % (self.gcn_output_dim, len(self.support) * div, len(self.support)) ) self.gcn_output_dim = len(self.support) * div diff --git a/recbole/model/general_recommender/nais.py b/recbole/model/general_recommender/nais.py index c59cf594b..a9237db3a 100644 --- a/recbole/model/general_recommender/nais.py +++ b/recbole/model/general_recommender/nais.py @@ -64,9 +64,9 @@ def __init__(self, config, dataset): self.logger.info('split the n_items to {} pieces'.format(self.split_to)) self.group = torch.chunk(torch.arange(self.n_items).to(self.device), self.split_to) else: - self.logger.warning('Pay Attetion!! the `split_to` is set to 0. If you catch a OMM error in this case, ' + \ - 'you need to increase it \n\t\t\tuntil the error disappears. For example, ' + \ - 'you can append it in the command line such as `--split_to=5`') + self.logger.warning('\033[1;31mPay Attetion!! the `split_to` is set to 0. If you catch a OMM error in this case,\033[0m] ' + \ + '\033[1;31myou need to increase it \n\t\t\tuntil the error disappears. For example,\033[0m ' + \ + '\033[1;31myou can append it in the command line such as `--split_to=5`\033[0m') # define layers and loss # construct source and destination item embedding matrix diff --git a/recbole/quick_start/quick_start.py b/recbole/quick_start/quick_start.py index 66aae1c1c..7062e584f 100644 --- a/recbole/quick_start/quick_start.py +++ b/recbole/quick_start/quick_start.py @@ -57,8 +57,8 @@ def run_recbole(model=None, dataset=None, config_file_list=None, config_dict=Non # model evaluation test_result = trainer.evaluate(test_data, load_best_model=saved, show_progress=config['show_progress']) - logger.info('best valid result: {}'.format(best_valid_result)) - logger.info('test result: {}'.format(test_result)) + logger.info('\033[1;33mbest valid \033[0m: {}'.format(best_valid_result)) + logger.info('\033[1;33mtest result\033[0m: {}'.format(test_result)) return { 'best_valid_score': best_valid_score, diff --git a/recbole/trainer/trainer.py b/recbole/trainer/trainer.py index 9f9bfd9b5..8b3effff8 100644 --- a/recbole/trainer/trainer.py +++ b/recbole/trainer/trainer.py @@ -116,9 +116,9 @@ def _build_optimizer(self): elif self.learner.lower() == 'sparse_adam': optimizer = optim.SparseAdam(self.model.parameters(), lr=self.learning_rate) if self.weight_decay > 0: - self.logger.warning('Sparse Adam cannot argument received argument [{weight_decay}]') + self.logger.warning('\033[1;31mSparse Adam cannot argument received argument [{weight_decay}]\033[0m') else: - self.logger.warning('Received unrecognized optimizer, set default Adam optimizer') + self.logger.warning('\033[1;31mReceived unrecognized optimizer, set default Adam optimizer\033[0m') optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) return optimizer @@ -144,7 +144,7 @@ def _train_epoch(self, train_data, epoch_idx, loss_func=None, show_progress=Fals tqdm( enumerate(train_data), total=len(train_data), - desc=f"Train {epoch_idx:>5}", + desc=f"\033[1;35mTrain {epoch_idx:>5}\033[0m", ) if show_progress else enumerate(train_data) ) for batch_idx, interaction in iter_data: @@ -213,8 +213,8 @@ def resume_checkpoint(self, resume_file): # load architecture params from checkpoint if checkpoint['config']['model'].lower() != self.config['model'].lower(): self.logger.warning( - 'Architecture configuration given in config file is different from that of checkpoint. ' - 'This may yield an exception while state_dict is being loaded.' + '\033[1;31mArchitecture configuration given in config file is different from that of checkpoint.\33[0m ' + '\033[1;31mThis may yield an exception while state_dict is being loaded.\033[0m' ) self.model.load_state_dict(checkpoint['state_dict']) @@ -229,13 +229,13 @@ def _check_nan(self, loss): def _generate_train_loss_output(self, epoch_idx, s_time, e_time, losses): des = self.config['loss_decimal_place'] or 4 - train_loss_output = 'epoch %d training [time: %.2fs, ' % (epoch_idx, e_time - s_time) + train_loss_output = '\033[1;32mepoch %d training\033[0m [\033[1;34mtime\033[0m: %.2fs, ' % (epoch_idx, e_time - s_time) if isinstance(losses, tuple): - des = 'train_loss%d: %.' + str(des) + 'f' + des = '\033[1;34mtrain_loss%d\033[0m: %.' + str(des) + 'f' train_loss_output += ', '.join(des % (idx + 1, loss) for idx, loss in enumerate(losses)) else: des = '%.' + str(des) + 'f' - train_loss_output += 'train loss:' + des % losses + train_loss_output += '\033[1;34mtrain loss\033[0m:' + des % losses return train_loss_output + ']' def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progress=False, callback_fn=None): @@ -272,7 +272,7 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre if self.eval_step <= 0 or not valid_data: if saved: self._save_checkpoint(epoch_idx) - update_output = 'Saving current: %s' % self.saved_model_file + update_output = '\033[1;32mSaving current\033[0m: %s' % self.saved_model_file if verbose: self.logger.info(update_output) continue @@ -287,16 +287,16 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre bigger=self.valid_metric_bigger ) valid_end_time = time() - valid_score_output = "epoch %d evaluating [time: %.2fs, valid_score: %f]" % \ + valid_score_output = "\033[1;32mepoch %d evaluating\033[0m [\033[1;34mtime\033[0m: %.2fs, \033[1;34mvalid_score\033[0m: %f]" % \ (epoch_idx, valid_end_time - valid_start_time, valid_score) - valid_result_output = 'valid result: \n' + dict2str(valid_result) + valid_result_output = '\033[1;34mvalid result\033[0m: \n' + dict2str(valid_result) if verbose: self.logger.info(valid_score_output) self.logger.info(valid_result_output) if update_flag: if saved: self._save_checkpoint(epoch_idx) - update_output = 'Saving current best: %s' % self.saved_model_file + update_output = '\033[1;34mSaving current best\033[0m: %s' % self.saved_model_file if verbose: self.logger.info(update_output) self.best_valid_result = valid_result @@ -380,7 +380,7 @@ def evaluate(self, eval_data, load_best_model=True, model_file=None, show_progre tqdm( enumerate(eval_data), total=len(eval_data), - desc=f"Evaluate ", + desc=f"\033[1;35mEvaluate\033[0m ", ) if show_progress else enumerate(eval_data) ) for batch_idx, batched_data in iter_data: @@ -541,7 +541,7 @@ def pretrain(self, train_data, verbose=True, show_progress=False): '{}-{}-{}.pth'.format(self.config['model'], self.config['dataset'], str(epoch_idx + 1)) ) self.save_pretrained_model(epoch_idx, saved_model_file) - update_output = 'Saving current: %s' % saved_model_file + update_output = '\033[0;34mSaving current\033[0m: %s' % saved_model_file if verbose: self.logger.info(update_output) @@ -701,9 +701,9 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre valid_start_time = time() valid_result, valid_score = self._valid_epoch(valid_data) valid_end_time = time() - valid_score_output = "epoch %d evaluating [time: %.2fs, valid_score: %f]" % \ + valid_score_output = "\033[0;34mepoch %d evaluating [time\033[0m: %.2fs, valid_score: %f]" % \ (epoch_idx, valid_end_time - valid_start_time, valid_score) - valid_result_output = 'valid result: \n' + dict2str(valid_result) + valid_result_output = '\033[0;34mvalid result\033[0m: \n' + dict2str(valid_result) if verbose: self.logger.info(valid_score_output) self.logger.info(valid_result_output) From ab17ddbdf266e43ed63051cd22f5621d516e2017 Mon Sep 17 00:00:00 2001 From: Guan-JW <15692276873@163.com> Date: Sat, 6 Mar 2021 15:44:53 +0800 Subject: [PATCH 02/16] give logger a color --- docs/source/developer_guide/customize_models.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/developer_guide/customize_models.rst b/docs/source/developer_guide/customize_models.rst index 85567f800..e6acc6c90 100644 --- a/docs/source/developer_guide/customize_models.rst +++ b/docs/source/developer_guide/customize_models.rst @@ -255,8 +255,8 @@ Then, we can use NewModel in RecBole as follows (e.g., `run.py`): # model evaluation test_result = trainer.evaluate(test_data) - logger.info('\033[1;34mbest valid result\033[0m: {}'.format(best_valid_result)) - logger.info('\033[1;34mtest result\033[0m: {}'.format(test_result)) + logger.info('best valid result: {}'.format(best_valid_result)) + logger.info('test result: {}'.format(test_result)) Then, we can run NewModel: From d309d463e602e6eb5339cb8ee6a7c67d26211393 Mon Sep 17 00:00:00 2001 From: Guan-JW <15692276873@163.com> Date: Sun, 7 Mar 2021 00:29:12 +0800 Subject: [PATCH 03/16] change color --- recbole/config/configurator.py | 22 +- recbole/config/eval_setting.py | 30 +-- .../data/dataloader/abstract_dataloader.py | 10 +- recbole/data/dataloader/general_dataloader.py | 2 +- .../data/dataloader/knowledge_dataloader.py | 8 +- recbole/data/dataloader/neg_sample_mixin.py | 18 +- recbole/data/dataloader/user_dataloader.py | 2 +- recbole/data/dataset/dataset.py | 106 ++++---- recbole/data/dataset/kg_dataset.py | 14 +- recbole/data/dataset/sequential_dataset.py | 18 +- recbole/data/dataset/social_dataset.py | 6 +- recbole/data/interaction.py | 24 +- recbole/data/utils.py | 236 ++++++------------ recbole/evaluator/abstract_evaluator.py | 4 +- recbole/evaluator/evaluators.py | 6 +- recbole/evaluator/metrics.py | 66 +---- recbole/evaluator/proxy_evaluator.py | 4 +- recbole/evaluator/utils.py | 2 +- recbole/model/abstract_recommender.py | 2 +- .../context_aware_recommender/xdeepfm.py | 4 +- recbole/model/general_recommender/cdae.py | 6 +- recbole/model/general_recommender/dmf.py | 2 +- recbole/model/general_recommender/fism.py | 6 +- recbole/model/general_recommender/gcmc.py | 2 +- recbole/model/general_recommender/nais.py | 8 +- recbole/model/general_recommender/neumf.py | 2 +- recbole/model/general_recommender/nncf.py | 4 +- recbole/model/layers.py | 12 +- .../model/sequential_recommender/bert4rec.py | 4 +- recbole/model/sequential_recommender/caser.py | 2 +- recbole/model/sequential_recommender/fdsa.py | 2 +- .../model/sequential_recommender/fossil.py | 2 +- recbole/model/sequential_recommender/gcsan.py | 2 +- .../model/sequential_recommender/gru4rec.py | 2 +- .../model/sequential_recommender/gru4recf.py | 2 +- .../model/sequential_recommender/gru4reckg.py | 2 +- recbole/model/sequential_recommender/hgn.py | 4 +- recbole/model/sequential_recommender/hrm.py | 2 +- recbole/model/sequential_recommender/ksr.py | 2 +- recbole/model/sequential_recommender/narm.py | 2 +- .../model/sequential_recommender/nextitnet.py | 2 +- recbole/model/sequential_recommender/npe.py | 2 +- recbole/model/sequential_recommender/s3rec.py | 2 +- .../model/sequential_recommender/sasrec.py | 2 +- .../model/sequential_recommender/sasrecf.py | 2 +- recbole/model/sequential_recommender/shan.py | 2 +- recbole/model/sequential_recommender/srgnn.py | 2 +- recbole/model/sequential_recommender/stamp.py | 2 +- recbole/quick_start/quick_start.py | 1 + recbole/sampler/sampler.py | 32 +-- recbole/trainer/hyper_tuning.py | 12 +- recbole/trainer/trainer.py | 16 +- recbole/utils/utils.py | 2 +- 53 files changed, 306 insertions(+), 427 deletions(-) diff --git a/recbole/config/configurator.py b/recbole/config/configurator.py index 1f96feaf4..49f68eb58 100644 --- a/recbole/config/configurator.py +++ b/recbole/config/configurator.py @@ -154,12 +154,12 @@ def _load_cmd_line(self): continue cmd_arg_name, cmd_arg_value = arg[2:].split("=") if cmd_arg_name in cmd_config_dict and cmd_arg_value != cmd_config_dict[cmd_arg_name]: - raise SyntaxError("There are duplicate commend arg '%s' with different value." % arg) + raise SyntaxError("\033[1;31mThere are duplicate commend arg '%s' with different value.\033[0m" % arg) else: cmd_config_dict[cmd_arg_name] = cmd_arg_value if len(unrecognized_args) > 0: logger = getLogger() - logger.warning('\033[1;31mcommand line args [{}] will not be used in RecBole\033[0m'.format(' '.join(unrecognized_args))) + logger.warning('\033[1;33mcommand line args [{}] will not be used in RecBole\033[0m'.format(' '.join(unrecognized_args))) cmd_config_dict = self._convert_config_dict(cmd_config_dict) return cmd_config_dict @@ -177,8 +177,8 @@ def _get_model_and_dataset(self, model, dataset): model = self.external_config_dict['model'] except KeyError: raise KeyError( - 'model need to be specified in at least one of the these ways: ' - '[model variable, config file, config dict, command line] ' + '\033[1;31mmodel need to be specified in at least one of the these ways: \033[0m' + '\033[1;31m[model variable, config file, config dict, command line] \033[0m' ) if not isinstance(model, str): final_model_class = model @@ -192,8 +192,8 @@ def _get_model_and_dataset(self, model, dataset): final_dataset = self.external_config_dict['dataset'] except KeyError: raise KeyError( - 'dataset need to be specified in at least one of the these ways: ' - '[dataset variable, config file, config dict, command line] ' + '\033[1;31mdataset need to be specified in at least one of the these ways: \033[0m' + '\033[1;31m[dataset variable, config file, config dict, command line] \033[0m' ) else: final_dataset = dataset @@ -279,18 +279,18 @@ def _set_default_parameters(self): elif self.final_config_dict['loss_type'] in ['BPR']: self.final_config_dict['MODEL_INPUT_TYPE'] = InputType.PAIRWISE else: - raise ValueError('Either Model has attr \'input_type\',' 'or arg \'loss_type\' should exist in config.') + raise ValueError('\033[1;31mEither Model has attr \'input_type\',' 'or arg \'loss_type\' should exist in config.\033[0m') eval_type = None for metric in self.final_config_dict['metrics']: if metric.lower() in individual_metrics: if eval_type is not None and eval_type == EvaluatorType.RANKING: - raise RuntimeError('Ranking metrics and other metrics can not be used at the same time.') + raise RuntimeError('\033[1;31mRanking metrics and other metrics can not be used at the same time.\033[0m') else: eval_type = EvaluatorType.INDIVIDUAL if metric.lower() in group_metrics: if eval_type is not None and eval_type == EvaluatorType.INDIVIDUAL: - raise RuntimeError('Ranking metrics and other metrics can not be used at the same time.') + raise RuntimeError('\033[1;31mRanking metrics and other metrics can not be used at the same time.\033[0m') else: eval_type = EvaluatorType.RANKING self.final_config_dict['eval_type'] = eval_type @@ -322,7 +322,7 @@ def _set_train_neg_sample_args(self): def __setitem__(self, key, value): if not isinstance(key, str): - raise TypeError("index must be a str.") + raise TypeError("\033[1;31mindex must be a str.\033[0m") self.final_config_dict[key] = value def __getitem__(self, item): @@ -333,7 +333,7 @@ def __getitem__(self, item): def __contains__(self, key): if not isinstance(key, str): - raise TypeError("index must be a str.") + raise TypeError("\033[1;31mindex must be a str.\033[0m") return key in self.final_config_dict def __str__(self): diff --git a/recbole/config/eval_setting.py b/recbole/config/eval_setting.py index 5ee4b81db..7b22bcaad 100644 --- a/recbole/config/eval_setting.py +++ b/recbole/config/eval_setting.py @@ -82,7 +82,7 @@ def __init__(self, config): self.set_ordering_and_splitting(self.es_str[0]) if len(self.es_str) > 1: if getattr(self, self.es_str[1], None) == None: - raise ValueError('Incorrect setting of negative sampling.') + raise ValueError('\033[1;31mIncorrect setting of negative sampling.') getattr(self, self.es_str[1])() presetting_args = ['group_field', 'ordering_args', 'split_args', 'neg_sample_args'] for args in presetting_args: @@ -159,7 +159,7 @@ def set_ordering(self, strategy='none', **kwargs): """ legal_strategy = {'none', 'shuffle', 'by'} if strategy not in legal_strategy: - raise ValueError('Ordering Strategy [{}] should in {}'.format(strategy, list(legal_strategy))) + raise ValueError('\033[1;31mOrdering Strategy [{}] should in {}\033[0m'.format(strategy, list(legal_strategy))) self.ordering_args = {'strategy': strategy} self.ordering_args.update(kwargs) @@ -208,9 +208,9 @@ def set_splitting(self, strategy='none', **kwargs): """ legal_strategy = {'none', 'by_ratio', 'by_value', 'loo'} if strategy not in legal_strategy: - raise ValueError('Split Strategy [{}] should in {}'.format(strategy, list(legal_strategy))) + raise ValueError('\033[1;31mSplit Strategy [{}] should in {}\033[0m'.format(strategy, list(legal_strategy))) if strategy == 'loo' and self.group_field is None: - raise ValueError('Leave-One-Out request group firstly') + raise ValueError('\033[1;31mLeave-One-Out request group firstly\033[0m') self.split_args = {'strategy': strategy} self.split_args.update(kwargs) @@ -225,7 +225,7 @@ def leave_one_out(self, leave_one_num=1): E.g. ``leave_one_num = 2`` if you have one validation dataset and one test dataset. """ if self.group_field is None: - raise ValueError('Leave one out request grouped dataset, please set group field.') + raise ValueError('\033[1;31mLeave one out request grouped dataset, please set group field.\033[0m') self.set_splitting(strategy='loo', leave_one_num=leave_one_num) def split_by_ratio(self, ratios): @@ -236,13 +236,13 @@ def split_by_ratio(self, ratios): No need to normalize. It's ok with either `[0.8, 0.1, 0.1]`, `[8, 1, 1]` or `[56, 7, 7]` """ if not isinstance(ratios, list): - raise ValueError('ratios [{}] should be list'.format(ratios)) + raise ValueError('\033[1;31mratios [{}] should be list\033[0m'.format(ratios)) self.set_splitting(strategy='by_ratio', ratios=ratios) def _split_by_value(self, field, values, ascending=True): - raise NotImplementedError('Split by value has not been implemented.') + raise NotImplementedError('\033[1;31mSplit by value has not been implemented.\033[0m') if not isinstance(field, str): - raise ValueError('field [{}] should be str'.format(field)) + raise ValueError('\033[1;31mfield [{}] should be str\033[0m'.format(field)) if not isinstance(values, list): values = [values] values.sort(reverse=(not ascending)) @@ -262,9 +262,9 @@ def set_neg_sampling(self, strategy='none', distribution='uniform', **kwargs): """ legal_strategy = {'none', 'full', 'by'} if strategy not in legal_strategy: - raise ValueError('Negative Sampling Strategy [{}] should in {}'.format(strategy, list(legal_strategy))) + raise ValueError('\033[1;31mNegative Sampling Strategy [{}] should in {}\033[0m'.format(strategy, list(legal_strategy))) if strategy == 'full' and distribution != 'uniform': - raise ValueError('Full Sort can not be sampled by distribution [{}]'.format(distribution)) + raise ValueError('\033[1;31mFull Sort can not be sampled by distribution [{}]\033[0m'.format(distribution)) self.neg_sample_args = {'strategy': strategy, 'distribution': distribution} self.neg_sample_args.update(kwargs) @@ -285,7 +285,7 @@ def set_ordering_and_splitting(self, es_str): """ args = es_str.split('_') if len(args) != 2: - raise ValueError(f'`{es_str}` is invalid eval_setting.') + raise ValueError(f'\033[1;31m`{es_str}` is invalid eval_setting.\0mm[0m') ordering_args, split_args = args if self.config['group_by_user']: @@ -296,20 +296,20 @@ def set_ordering_and_splitting(self, es_str): elif ordering_args == 'TO': self.temporal_ordering() else: - raise NotImplementedError(f'Ordering args `{ordering_args}` is not implemented.') + raise NotImplementedError(f'\033[1;31mOrdering args `{ordering_args}` is not implemented.\033[0m') if split_args == 'RS': ratios = self.config['split_ratio'] if ratios is None: - raise ValueError('`ratios` should be set if `RS` is set.') + raise ValueError('\033[1;31m`ratios` should be set if `RS` is set.\033[0m') self.split_by_ratio(ratios) elif split_args == 'LS': leave_one_num = self.config['leave_one_num'] if leave_one_num is None: - raise ValueError('`leave_one_num` should be set if `LS` is set.') + raise ValueError('\033[1;31m`leave_one_num` should be set if `LS` is set.\033[0m') self.leave_one_out(leave_one_num=leave_one_num) else: - raise NotImplementedError(f'Split args `{split_args}` is not implemented.') + raise NotImplementedError(f'\033[1;31mSplit args `{split_args}` is not implemented.\033[0m') def RO_RS(self, ratios=(0.8, 0.1, 0.1), group_by_user=True): """Preset about Random Ordering and Ratio-based Splitting. diff --git a/recbole/data/dataloader/abstract_dataloader.py b/recbole/data/dataloader/abstract_dataloader.py index 43acf03a6..2a28850f2 100644 --- a/recbole/data/dataloader/abstract_dataloader.py +++ b/recbole/data/dataloader/abstract_dataloader.py @@ -96,12 +96,12 @@ def __next__(self): @property def pr_end(self): """This property marks the end of dataloader.pr which is used in :meth:`__next__()`.""" - raise NotImplementedError('Method [pr_end] should be implemented') + raise NotImplementedError('\033[1;31mMethod [pr_end] should be implemented\033[0m') def _shuffle(self): """Shuffle the order of data, and it will be called by :meth:`__iter__()` if self.shuffle is True. """ - raise NotImplementedError('Method [shuffle] should be implemented.') + raise NotImplementedError('\033[1;31mMethod [shuffle] should be implemented.\033[0m') def _next_batch_data(self): """Assemble next batch of data in form of Interaction, and return these data. @@ -109,7 +109,7 @@ def _next_batch_data(self): Returns: Interaction: The next batch of data. """ - raise NotImplementedError('Method [next_batch_data] should be implemented.') + raise NotImplementedError('\033[1;31mMethod [next_batch_data] should be implemented.\033[0m') def set_batch_size(self, batch_size): """Reset the batch_size of the dataloader, but it can't be called when dataloader is being iterated. @@ -118,10 +118,10 @@ def set_batch_size(self, batch_size): batch_size (int): the new batch_size of dataloader. """ if self.pr != 0: - raise PermissionError('Cannot change dataloader\'s batch_size while iteration') + raise PermissionError('\033[1;31mCannot change dataloader\'s batch_size while iteration\033[0m') if self.batch_size != batch_size: self.batch_size = batch_size - self.logger.warning(f'\033[1;31mBatch size is changed to {batch_size}\033[0m.') + self.logger.warning(f'\033[1;33mBatch size is changed to {batch_size}\033[0m.') def upgrade_batch_size(self, batch_size): """Upgrade the batch_size of the dataloader, if input batch_size is bigger than current batch_size. diff --git a/recbole/data/dataloader/general_dataloader.py b/recbole/data/dataloader/general_dataloader.py index 4cc44011f..559677b55 100644 --- a/recbole/data/dataloader/general_dataloader.py +++ b/recbole/data/dataloader/general_dataloader.py @@ -207,7 +207,7 @@ def __init__( self, config, dataset, sampler, neg_sample_args, batch_size=1, dl_format=InputType.POINTWISE, shuffle=False ): if neg_sample_args['strategy'] != 'full': - raise ValueError('neg_sample strategy in GeneralFullDataLoader() should be `full`') + raise ValueError('\033[1;31mneg_sample strategy in GeneralFullDataLoader() should be `full`\033[0m') uid_field = dataset.uid_field iid_field = dataset.iid_field diff --git a/recbole/data/dataloader/knowledge_dataloader.py b/recbole/data/dataloader/knowledge_dataloader.py index 88396976c..dcb0d2d06 100644 --- a/recbole/data/dataloader/knowledge_dataloader.py +++ b/recbole/data/dataloader/knowledge_dataloader.py @@ -55,7 +55,7 @@ def setup(self): """ if self.shuffle is False: self.shuffle = True - self.logger.warning('\033[1;31mkg based dataloader must shuffle the data\033[0m') + self.logger.warning('\033[1;33mkg based dataloader must shuffle the data\033[0m') @property def pr_end(self): @@ -145,8 +145,8 @@ def __init__( def __iter__(self): if self.state is None: raise ValueError( - 'The dataloader\'s state must be set when using the kg based dataloader, ' - 'you should call set_mode() before __iter__()' + '\033[1;31mThe dataloader\'s state must be set when using the kg based dataloader, \033[0m' + '\033[1;31myou should call set_mode() before __iter__()\033[0m' ) if self.state == KGDataLoaderState.KG: return self.kg_dataloader.__iter__() @@ -202,5 +202,5 @@ def set_mode(self, state): state (KGDataLoaderState): the state of :class:`KnowledgeBasedDataLoader`. """ if state not in set(KGDataLoaderState): - raise NotImplementedError(f'Kg data loader has no state named [{self.state}].') + raise NotImplementedError(f'\033[1;31mKg data loader has no state named [{self.state}].\033[0m') self.state = state diff --git a/recbole/data/dataloader/neg_sample_mixin.py b/recbole/data/dataloader/neg_sample_mixin.py index e21d614ac..7eb983d1c 100644 --- a/recbole/data/dataloader/neg_sample_mixin.py +++ b/recbole/data/dataloader/neg_sample_mixin.py @@ -37,7 +37,7 @@ def __init__( self, config, dataset, sampler, neg_sample_args, batch_size=1, dl_format=InputType.POINTWISE, shuffle=False ): if neg_sample_args['strategy'] not in ['by', 'full']: - raise ValueError(f"Neg_sample strategy [{neg_sample_args['strategy']}] has not been implemented.") + raise ValueError(f"\033[1;31mNeg_sample strategy [{neg_sample_args['strategy']}] has not been implemented.\033[0m") self.sampler = sampler self.neg_sample_args = neg_sample_args @@ -52,7 +52,7 @@ def setup(self): def _batch_size_adaptation(self): """Adjust the batch size to ensure that each positive and negative interaction can be in a batch. """ - raise NotImplementedError('Method [batch_size_adaptation] should be implemented.') + raise NotImplementedError('\033[1;31mMethod [batch_size_adaptation] should be implemented.\033[0m') def _neg_sampling(self, inter_feat): """ @@ -62,21 +62,21 @@ def _neg_sampling(self, inter_feat): Returns: The user-item interaction table with negative example. """ - raise NotImplementedError('Method [neg_sampling] should be implemented.') + raise NotImplementedError('\033[1;31mMethod [neg_sampling] should be implemented.\033[0m') def get_pos_len_list(self): """ Returns: numpy.ndarray: Number of positive item for each user in a training/evaluating epoch. """ - raise NotImplementedError('Method [get_pos_len_list] should be implemented.') + raise NotImplementedError('\033[1;31mMethod [get_pos_len_list] should be implemented.\033[0m') def get_user_len_list(self): """ Returns: numpy.ndarray: Number of all item for each user in a training/evaluating epoch. """ - raise NotImplementedError('Method [get_user_len_list] should be implemented.') + raise NotImplementedError('\033[1;31mMethod [get_user_len_list] should be implemented.\033[0m') class NegSampleByMixin(NegSampleMixin): @@ -99,7 +99,7 @@ def __init__( self, config, dataset, sampler, neg_sample_args, batch_size=1, dl_format=InputType.POINTWISE, shuffle=False ): if neg_sample_args['strategy'] != 'by': - raise ValueError('neg_sample strategy in GeneralInteractionBasedDataLoader() should be `by`') + raise ValueError('\033[1;31mneg_sample strategy in GeneralInteractionBasedDataLoader() should be `by`\033[0m') self.user_inter_in_one_batch = (sampler.phase != 'train') and (config['eval_type'] != EvaluatorType.INDIVIDUAL) self.neg_sample_by = neg_sample_args['by'] @@ -123,7 +123,7 @@ def __init__( neg_item_feat_col = self.neg_prefix + item_feat_col dataset.copy_field_property(neg_item_feat_col, item_feat_col) else: - raise ValueError(f'`neg sampling by` with dl_format [{dl_format}] not been implemented.') + raise ValueError(f'\033[1;31m`neg sampling by` with dl_format [{dl_format}] not been implemented.\033[0m') super().__init__( config, dataset, sampler, neg_sample_args, batch_size=batch_size, dl_format=dl_format, shuffle=shuffle @@ -132,9 +132,9 @@ def __init__( def _neg_sample_by_pair_wise_sampling(self, *args): """Pair-wise sampling. """ - raise NotImplementedError('Method [neg_sample_by_pair_wise_sampling] should be implemented.') + raise NotImplementedError('\033[1;31mMethod [neg_sample_by_pair_wise_sampling] should be implemented.\033[0m') def _neg_sample_by_point_wise_sampling(self, *args): """Point-wise sampling. """ - raise NotImplementedError('Method [neg_sample_by_point_wise_sampling] should be implemented.') + raise NotImplementedError('\033[1;31mMethod [neg_sample_by_point_wise_sampling] should be implemented.\033[0m') diff --git a/recbole/data/dataloader/user_dataloader.py b/recbole/data/dataloader/user_dataloader.py index cd86753aa..8143af21d 100644 --- a/recbole/data/dataloader/user_dataloader.py +++ b/recbole/data/dataloader/user_dataloader.py @@ -47,7 +47,7 @@ def setup(self): """ if self.shuffle is False: self.shuffle = True - self.logger.warning('\033[1;31mUserDataLoader must shuffle the data\033[0m') + self.logger.warning('\033[1;33mUserDataLoader must shuffle the data\033[0m') @property def pr_end(self): diff --git a/recbole/data/dataset/dataset.py b/recbole/data/dataset/dataset.py index 37210434b..43756eeda 100644 --- a/recbole/data/dataset/dataset.py +++ b/recbole/data/dataset/dataset.py @@ -135,7 +135,7 @@ def _get_field_from_config(self): if (self.uid_field is None) ^ (self.iid_field is None): raise ValueError( - 'USER_ID_FIELD and ITEM_ID_FIELD need to be set at the same time or not set at the same time.' + '\033[1;31mUSER_ID_FIELD and ITEM_ID_FIELD need to be set at the same time or not set at the same time.\033[0m' ) self.logger.debug(f'\033[0;34muid_field\033[0m: {self.uid_field}') @@ -211,7 +211,7 @@ def _restore_saved_dataset(self, saved_dataset): self.logger.debug(f'\033[0;32mRestoring dataset from [{saved_dataset}].\033[0m') if (saved_dataset is None) or (not os.path.isdir(saved_dataset)): - raise ValueError(f'Filepath [{saved_dataset}] need to be a dir.') + raise ValueError(f'\033[1;31mFilepath [{saved_dataset}] need to be a dir.\033[0m') with open(os.path.join(saved_dataset, 'basic-info.json')) as file: basic_info = json.load(file) @@ -261,7 +261,7 @@ def _load_inter_feat(self, token, dataset_path): if self.benchmark_filename_list is None: inter_feat_path = os.path.join(dataset_path, f'{token}.inter') if not os.path.isfile(inter_feat_path): - raise ValueError(f'File {inter_feat_path} not exist.') + raise ValueError(f'\033[1;31mFile {inter_feat_path} not exist.\033[0m') inter_feat = self._load_feat(inter_feat_path, FeatureSource.INTERACTION) self.logger.debug(f'Interaction feature loaded successfully from [{inter_feat_path}].') @@ -276,7 +276,7 @@ def _load_inter_feat(self, token, dataset_path): sub_inter_feats.append(temp) sub_inter_lens.append(len(temp)) else: - raise ValueError(f'File {file_path} not exist.') + raise ValueError(f'\033[1;31mFile {file_path} not exist.\033[0m') inter_feat = pd.concat(sub_inter_feats) self.inter_feat, self.file_size_list = inter_feat, sub_inter_lens @@ -306,9 +306,9 @@ def _load_user_or_item_feat(self, token, dataset_path, source, field_name): field = getattr(self, field_name, None) if feat is not None and field is None: - raise ValueError(f'{field_name} must be exist if {source.value}_feat exist.') + raise ValueError(f'\033[1;31m{field_name} must be exist if {source.value}_feat exist.\033[0m') if feat is not None and field not in feat: - raise ValueError(f'{field_name} must be loaded if {source.value}_feat is loaded.') + raise ValueError(f'\033[1;31m{field_name} must be loaded if {source.value}_feat is loaded.\033[0m') if field in self.field2source: self.field2source[field] = FeatureSource(source.value + '_id') @@ -329,12 +329,12 @@ def _load_additional_feat(self, token, dataset_path): return for suf in self.config['additional_feat_suffix']: if hasattr(self, f'{suf}_feat'): - raise ValueError(f'{suf}_feat already exist.') + raise ValueError(f'\033[1;31m{suf}_feat already exist.\033[0m') feat_path = os.path.join(dataset_path, f'{token}.{suf}') if os.path.isfile(feat_path): feat = self._load_feat(feat_path, suf) else: - raise ValueError(f'Additional feature file [{feat_path}] not found.') + raise ValueError(f'\033[1;31mAdditional feature file [{feat_path}] not found.\033[0m') setattr(self, f'{suf}_feat', feat) def _get_load_and_unload_col(self, source): @@ -364,7 +364,7 @@ def _get_load_and_unload_col(self, source): unload_col = None if load_col and unload_col: - raise ValueError(f'load_col [{load_col}] and unload_col [{unload_col}] can not be set the same time.') + raise ValueError(f'\033[1;31mload_col [{load_col}] and unload_col [{unload_col}] can not be set the same time.\033[0m') self.logger.debug(f'\033[0;35m[{source}]: \033[0m') self.logger.debug(f'\t \033[0;34mload_col\033[0m: [{load_col}]') @@ -405,7 +405,7 @@ def _load_feat(self, filepath, source): try: ftype = FeatureType(ftype) except ValueError: - raise ValueError(f'Type {ftype} from field {field} is not supported.') + raise ValueError(f'\033[1;31mType {ftype} from field {field} is not supported.\033[0m') if load_col is not None and field not in load_col: continue if unload_col is not None and field in unload_col: @@ -420,7 +420,7 @@ def _load_feat(self, filepath, source): dtype[field_type] = np.float64 if ftype == FeatureType.FLOAT else str if len(columns) == 0: - self.logger.warning(f'No columns has been loaded from [{source}]') + self.logger.warning(f'\033[1;33mNo columns has been loaded from [{source}]\033[0m') return None df = pd.read_csv(filepath, delimiter=self.config['field_separator'], usecols=usecols, dtype=dtype) @@ -465,16 +465,16 @@ def _preload_weight_matrix(self): for preload_id_field in preload_fields: preload_value_field = preload_fields[preload_id_field] if preload_id_field not in self.field2source: - raise ValueError(f'Preload id field [{preload_id_field}] not exist.') + raise ValueError(f'\033[1;31mPreload id field [{preload_id_field}] not exist.\033[0m') if preload_value_field not in self.field2source: - raise ValueError(f'Preload value field [{preload_value_field}] not exist.') + raise ValueError(f'\033[1;31mPreload value field [{preload_value_field}] not exist.\033[0m') pid_source = self.field2source[preload_id_field] pv_source = self.field2source[preload_value_field] if pid_source != pv_source: raise ValueError( - f'Preload id field [{preload_id_field}] is from source [{pid_source}],' - f'while preload value field [{preload_value_field}] is from source [{pv_source}], ' - f'which should be the same.' + f'\033[1;31mPreload id field [{preload_id_field}] is from source [{pid_source}],\033[0m' + f'\033[1;31mwhile preload value field [{preload_value_field}] is from source [{pv_source}], \033[0m' + f'\033[1;31mwhich should be the same.\033[0m' ) for feat_name in self.feat_name_list: feat = getattr(self, feat_name) @@ -482,7 +482,7 @@ def _preload_weight_matrix(self): id_ftype = self.field2type[preload_id_field] if id_ftype != FeatureType.TOKEN: raise ValueError( - f'Preload id field [{preload_id_field}] should be type token, but is [{id_ftype}].' + f'\033[1;31mPreload id field [{preload_id_field}] should be type token, but is [{id_ftype}].\033[0m' ) value_ftype = self.field2type[preload_value_field] token_num = self.num(preload_id_field) @@ -505,8 +505,8 @@ def _preload_weight_matrix(self): matrix[pid] = prow[:max_len] else: self.logger.warning( - f'\033[1;31mField [{preload_value_field}] with type [{value_ftype}] is not `float` or `float_seq`, \033[0m' - f'\033[1;31mwhich will not be handled by preload matrix.\033[0m' + f'\033[1;33mField [{preload_value_field}] with type [{value_ftype}] is not `float` or `float_seq`, \033[0m' + f'\033[1;33mwhich will not be handled by preload matrix.\033[0m' ) continue self._preloaded_weight[preload_id_field] = matrix @@ -545,16 +545,16 @@ def _normalize(self): Only float-like fields can be normalized. """ if self.config['normalize_field'] is not None and self.config['normalize_all'] is True: - raise ValueError('Normalize_field and normalize_all can\'t be set at the same time.') + raise ValueError('\033[1;31mNormalize_field and normalize_all can\'t be set at the same time.\033[0m') if self.config['normalize_field']: fields = self.config['normalize_field'] for field in fields: ftype = self.field2type[field] if field not in self.field2type: - raise ValueError(f'Field [{field}] does not exist.') + raise ValueError(f'\033[1;31mField [{field}] does not exist.\033[0m') elif ftype != FeatureType.FLOAT and ftype != FeatureType.FLOAT_SEQ: - self.logger.warning(f'\033[1;31m{field} is not a FLOAT/FLOAT_SEQ feat, which will not be normalized.\033[0m') + self.logger.warning(f'\033[1;33m{field} is not a FLOAT/FLOAT_SEQ feat, which will not be normalized.\033[0m') elif self.config['normalize_all']: fields = self.float_like_fields else: @@ -572,7 +572,7 @@ def _normalize(self): lst = feat[field].values mx, mn = max(lst), min(lst) if mx == mn: - self.logger.warning(f'\033[1;31mAll the same value in [{field}] from [{feat}_feat].\033[0m') + self.logger.warning(f'\033[1;33mAll the same value in [{field}] from [{feat}_feat].\033[0m') feat[field] = 1.0 else: feat[field] = (lst - mn) / (mx - mn) @@ -581,7 +581,7 @@ def _normalize(self): lst = feat[field].agg(np.concatenate) mx, mn = max(lst), min(lst) if mx == mn: - self.logger.warning(f'\033[1;31mAll the same value in [{field}] from [{feat}_feat].\033[0m') + self.logger.warning(f'\033[1;33mAll the same value in [{field}] from [{feat}_feat].\033[0m') lst = 1.0 else: lst = (lst - mn) / (mx - mn) @@ -597,14 +597,14 @@ def _filter_nan_user_or_item(self): dropped_feat = feat.index[feat[field].isnull()] if len(dropped_feat): self.logger.warning( - f'\033[1;31mIn {name}_feat, line {list(dropped_feat + 2)}, {field} do not exist, so they will be removed.\033[0m' + f'\033[1;33mIn {name}_feat, line {list(dropped_feat + 2)}, {field} do not exist, so they will be removed.\033[0m' ) feat.drop(feat.index[dropped_feat], inplace=True) if field is not None: dropped_inter = self.inter_feat.index[self.inter_feat[field].isnull()] if len(dropped_inter): self.logger.warning( - f'\033[1;31mIn inter_feat, line {list(dropped_inter + 2)}, {field} do not exist, so they will be removed.\033[0m' + f'\033[1;33mIn inter_feat, line {list(dropped_inter + 2)}, {field} do not exist, so they will be removed.\033[0m' ) self.inter_feat.drop(self.inter_feat.index[dropped_inter], inplace=True) @@ -629,8 +629,8 @@ def _remove_duplication(self): ) else: self.logger.warning( - f'\033[1;31mTimestamp field has not been loaded or specified, \033[0m' - f'\033[1;31mthus strategy [{keep}] of duplication removal may be meaningless.\033[0m' + f'\033[1;33mTimestamp field has not been loaded or specified, \033[0m' + f'\033[1;33mthus strategy [{keep}] of duplication removal may be meaningless.\033[0m' ) self.inter_feat.drop_duplicates(subset=[self.uid_field, self.iid_field], keep=keep, inplace=True) @@ -744,7 +744,7 @@ def _reset_index(self): for feat_name in self.feat_name_list: feat = getattr(self, feat_name) if feat.empty: - raise ValueError('Some feat is empty, please check the filtering settings.') + raise ValueError('\033[1;31mSome feat is empty, please check the filtering settings.\033[0m') feat.reset_index(drop=True, inplace=True) def _drop_by_value(self, val, cmp): @@ -764,9 +764,9 @@ def _drop_by_value(self, val, cmp): filter_field = [] for field in val: if field not in self.field2type: - raise ValueError(f'Field [{field}] not defined in dataset.') + raise ValueError(f'\033[1;31mField [{field}] not defined in dataset.\033[0m') if self.field2type[field] not in {FeatureType.FLOAT, FeatureType.FLOAT_SEQ}: - raise ValueError(f'Field [{field}] is not float-like field in dataset, which can\'t be filter.') + raise ValueError(f'\033[1;31mField [{field}] is not float-like field in dataset, which can\'t be filter.\033[0m') for feat_name in self.feat_name_list: feat = getattr(self, feat_name) if field in feat: @@ -826,14 +826,14 @@ def _set_label_by_threshold(self): self.logger.debug(f'Set label by {threshold}.') if len(threshold) != 1: - raise ValueError('Threshold length should be 1.') + raise ValueError('\033[1;31mThreshold length should be 1.\033[0m') self.set_field_property(self.label_field, FeatureType.FLOAT, FeatureSource.INTERACTION, 1) for field, value in threshold.items(): if field in self.inter_feat: self.inter_feat[self.label_field] = (self.inter_feat[field] >= value).astype(int) else: - raise ValueError(f'Field [{field}] not in inter_feat.') + raise ValueError(f'\033[1;31mField [{field}] not in inter_feat.\033[0m') self._del_col(self.inter_feat, field) def _get_fields_in_same_space(self): @@ -858,14 +858,14 @@ def _get_fields_in_same_space(self): elif count == 1: continue else: - raise ValueError(f'Field [{field}] occurred in `fields_in_same_space` more than one time.') + raise ValueError(f'\033[1;31mField [{field}] occurred in `fields_in_same_space` more than one time.\033[0m') for field_set in fields_in_same_space: if self.uid_field in field_set and self.iid_field in field_set: - raise ValueError('uid_field and iid_field can\'t in the same ID space') + raise ValueError('\033[1;31muid_field and iid_field can\'t in the same ID space\033[0m') for field in field_set: if field not in token_like_fields: - raise ValueError(f'Field [{field}] is not a token-like field.') + raise ValueError(f'\033[1;31mField [{field}] is not a token-like field.\033[0m') fields_in_same_space.extend(additional) return fields_in_same_space @@ -975,7 +975,7 @@ def num(self, field): int: The number of different tokens (``1`` if ``field`` is a float-like field). """ if field not in self.field2type: - raise ValueError(f'Field [{field}] not defined in dataset.') + raise ValueError(f'\033[1;31mField [{field}] not defined in dataset.\033[0m') if self.field2type[field] not in {FeatureType.TOKEN, FeatureType.TOKEN_SEQ}: return self.field2seqlen[field] else: @@ -1080,11 +1080,11 @@ def token2id(self, field, tokens): if tokens in self.field2token_id[field]: return self.field2token_id[field][tokens] else: - raise ValueError('token [{}] is not existed') + raise ValueError('\033[1;31mtoken [{}] is not existed\033[0m') elif isinstance(tokens, (list, np.ndarray)): return np.array([self.token2id(field, token) for token in tokens]) else: - raise TypeError('The type of tokens [{}] is not supported') + raise TypeError('\033[1;31mThe type of tokens [{}] is not supported\033[0m') @dlapi.set() def id2token(self, field, ids): @@ -1101,9 +1101,9 @@ def id2token(self, field, ids): return self.field2id_token[field][ids] except IndexError: if isinstance(ids, list): - raise ValueError(f'[{ids}] is not a one-dimensional list.') + raise ValueError(f'\033[1;31m[{ids}] is not a one-dimensional list.\033[0m') else: - raise ValueError(f'[{ids}] is not a valid ids.') + raise ValueError(f'\033[1;31m[{ids}] is not a valid ids.\033[0m') @property @dlapi.set() @@ -1177,7 +1177,7 @@ def _check_field(self, *field_names): """ for field_name in field_names: if getattr(self, field_name, None) is None: - raise ValueError(f'{field_name} isn\'t set.') + raise ValueError(f'\033[1;31m{field_name} isn\'t set.\033[0m') @dlapi.set() def join(self, df): @@ -1247,7 +1247,7 @@ def _drop_unused_col(self): for field in unused_fields: if field not in feat: self.logger.warning( - f'\033[1;31mField [{field}] is not in [{feat_name}_feat], which can not be set in `unused_col`.\033[0m' + f'\033[1;33mField [{field}] is not in [{feat_name}_feat], which can not be set in `unused_col`.\033[0m' ) continue self._del_col(feat, field) @@ -1349,7 +1349,7 @@ def leave_one_out(self, group_by, leave_one_num=1): """ self.logger.debug(f'leave one out, group_by=[{group_by}], leave_one_num=[{leave_one_num}]') if group_by is None: - raise ValueError('leave one out strategy require a group field') + raise ValueError('\033[1;31mleave one out strategy require a group field\033[0m') grouped_inter_feat_index = self._grouped_index(self.inter_feat[group_by].numpy()) next_index = self._split_index_by_leave_one_out(grouped_inter_feat_index, leave_one_num) @@ -1419,7 +1419,7 @@ def save(self, filepath): filepath (str): path of saved dir. """ if (filepath is None) or (not os.path.isdir(filepath)): - raise ValueError(f'Filepath [{filepath}] need to be a dir.') + raise ValueError(f'\033[1;31mFilepath [{filepath}] need to be a dir.\033[0m') self.logger.debug(f'Saving into [{filepath}]') basic_info = { @@ -1489,7 +1489,7 @@ def _create_sparse_matrix(self, df_feat, source_field, target_field, form='coo', data = np.ones(len(df_feat)) else: if value_field not in df_feat: - raise ValueError(f'Value_field [{value_field}] should be one of `df_feat`\'s features.') + raise ValueError(f'\033[1;31mValue_field [{value_field}] should be one of `df_feat`\'s features.\033[0m') data = df_feat[value_field] mat = coo_matrix((data, (src, tgt)), shape=(self.num(source_field), self.num(target_field))) @@ -1498,7 +1498,7 @@ def _create_sparse_matrix(self, df_feat, source_field, target_field, form='coo', elif form == 'csr': return mat.tocsr() else: - raise NotImplementedError(f'Sparse matrix format [{form}] has not been implemented.') + raise NotImplementedError(f'\033[1;31mSparse matrix format [{form}] has not been implemented.\033[0m') def _create_graph(self, tensor_feat, source_field, target_field, form='dgl', value_field=None): """Get graph that describe relations between two fields. @@ -1545,7 +1545,7 @@ def _create_graph(self, tensor_feat, source_field, target_field, form='dgl', val graph = Data(edge_index=torch.stack([src, tgt]), edge_attr=edge_attr) return graph else: - raise NotImplementedError(f'Graph format [{form}] has not been implemented.') + raise NotImplementedError(f'\033[1;31mGraph format [{form}] has not been implemented.\033[0m') @dlapi.set() def inter_matrix(self, form='coo', value_field=None): @@ -1565,7 +1565,7 @@ def inter_matrix(self, form='coo', value_field=None): scipy.sparse: Sparse matrix in form ``coo`` or ``csr``. """ if not self.uid_field or not self.iid_field: - raise ValueError('dataset does not exist uid/iid, thus can not converted to sparse matrix.') + raise ValueError('\033[1;31mdataset does not exist uid/iid, thus can not converted to sparse matrix.\033[0m') return self._create_sparse_matrix(self.inter_feat, self.uid_field, self.iid_field, form, value_field) def _history_matrix(self, row, value_field=None): @@ -1598,7 +1598,7 @@ def _history_matrix(self, row, value_field=None): values = np.ones(len(self.inter_feat)) else: if value_field not in self.inter_feat: - raise ValueError(f'Value_field [{value_field}] should be one of `inter_feat`\'s features.') + raise ValueError(f'\033[1;31mValue_field [{value_field}] should be one of `inter_feat`\'s features.\033[0m') values = self.inter_feat[value_field].numpy() if row == 'user': @@ -1615,8 +1615,8 @@ def _history_matrix(self, row, value_field=None): col_num = np.max(history_len) if col_num > max_col_num * 0.2: self.logger.warning( - f'\033[1;31mMax value of {row}\'s history interaction records has reached \033[0m' - f'\033[1;31m{col_num / max_col_num * 100}% of the total.\033[0m' + f'\033[1;33mMax value of {row}\'s history interaction records has reached \033[0m' + f'\033[1;33m{col_num / max_col_num * 100}% of the total.\033[0m' ) history_matrix = np.zeros((row_num, col_num), dtype=np.int64) @@ -1692,7 +1692,7 @@ def get_preload_weight(self, field): numpy.ndarray: preloaded weight matrix. See :doc:`../user_guide/data/data_args` for details. """ if field not in self._preloaded_weight: - raise ValueError(f'Field [{field}] not in preload_weight') + raise ValueError(f'\033[1;31mField [{field}] not in preload_weight\033[0m') return self._preloaded_weight[field] def _dataframe_to_interaction(self, data): diff --git a/recbole/data/dataset/kg_dataset.py b/recbole/data/dataset/kg_dataset.py index 71b4c5536..1a8870d26 100644 --- a/recbole/data/dataset/kg_dataset.py +++ b/recbole/data/dataset/kg_dataset.py @@ -141,7 +141,7 @@ def _load_kg(self, token, dataset_path): self.logger.debug(f'\033[0;32mLoading kg from [{dataset_path}].\033[0m') kg_path = os.path.join(dataset_path, f'{token}.kg') if not os.path.isfile(kg_path): - raise ValueError(f'[{token}.kg] not found in [{dataset_path}].') + raise ValueError(f'\033[1;31m[{token}.kg] not found in [{dataset_path}].\033[0m') df = self._load_feat(kg_path, FeatureSource.KG) self._check_kg(df) return df @@ -156,7 +156,7 @@ def _load_link(self, token, dataset_path): self.logger.debug(f'\033[0;32mLoading link from [{dataset_path}].\033[0m') link_path = os.path.join(dataset_path, f'{token}.link') if not os.path.isfile(link_path): - raise ValueError(f'[{token}.link] not found in [{dataset_path}].') + raise ValueError(f'\033[1;31m[{token}.link] not found in [{dataset_path}].\033[0m') df = self._load_feat(link_path, 'link') self._check_link(df) @@ -442,7 +442,7 @@ def kg_graph(self, form='coo', value_field=None): elif form in ['dgl', 'pyg']: return self._create_graph(*args) else: - raise NotImplementedError('kg graph format [{}] has not been implemented.') + raise NotImplementedError('\033[1;31mkg graph format [{}] has not been implemented.\033[0m') def _create_ckg_sparse_matrix(self, form='coo', show_relation=False): user_num = self.user_num @@ -473,7 +473,7 @@ def _create_ckg_sparse_matrix(self, form='coo', show_relation=False): elif form == 'csr': return mat.tocsr() else: - raise NotImplementedError(f'Sparse matrix format [{form}] has not been implemented.') + raise NotImplementedError(f'\033[1;31mSparse matrix format [{form}] has not been implemented.\033[0m') def _create_ckg_graph(self, form='dgl', show_relation=False): user_num = self.user_num @@ -510,7 +510,7 @@ def _create_ckg_graph(self, form='dgl', show_relation=False): graph = Data(edge_index=torch.stack([src, tgt]), edge_attr=edge_attr) return graph else: - raise NotImplementedError(f'Graph format [{form}] has not been implemented.') + raise NotImplementedError(f'Graph format [{form}] has not been implemented.\033[0m') @dlapi.set() def ckg_graph(self, form='coo', value_field=None): @@ -542,7 +542,7 @@ def ckg_graph(self, form='coo', value_field=None): https://github.com/rusty1s/pytorch_geometric """ if value_field is not None and value_field != self.relation_field: - raise ValueError(f'Value_field [{value_field}] can only be [{self.relation_field}] in ckg_graph.') + raise ValueError(f'\033[1;31mValue_field [{value_field}] can only be [{self.relation_field}] in ckg_graph.\033[0m') show_relation = value_field is not None if form in ['coo', 'csr']: @@ -550,4 +550,4 @@ def ckg_graph(self, form='coo', value_field=None): elif form in ['dgl', 'pyg']: return self._create_ckg_graph(form, show_relation) else: - raise NotImplementedError('ckg graph format [{}] has not been implemented.') + raise NotImplementedError('\033[1;31mckg graph format [{}] has not been implemented.\033[0m') diff --git a/recbole/data/dataset/sequential_dataset.py b/recbole/data/dataset/sequential_dataset.py index 02e4842be..43cc175c9 100644 --- a/recbole/data/dataset/sequential_dataset.py +++ b/recbole/data/dataset/sequential_dataset.py @@ -89,9 +89,9 @@ def prepare_data_augmentation(self): def leave_one_out(self, group_by, leave_one_num=1): self.logger.debug(f'Leave one out, group_by=[{group_by}], leave_one_num=[{leave_one_num}].') if group_by is None: - raise ValueError('Leave one out strategy require a group field.') + raise ValueError('\033[1;31mLeave one out strategy require a group field.\033[0m') if group_by != self.uid_field: - raise ValueError('Sequential models require group by user.') + raise ValueError('\033[1;31mSequential models require group by user.\033[0m') self.prepare_data_augmentation() grouped_index = self._grouped_index(self.uid_list) @@ -121,10 +121,10 @@ def inter_matrix(self, form='coo', value_field=None): scipy.sparse: Sparse matrix in form ``coo`` or ``csr``. """ if not self.uid_field or not self.iid_field: - raise ValueError('dataset does not exist uid/iid, thus can not converted to sparse matrix.') + raise ValueError('\033[1;31mdataset does not exist uid/iid, thus can not converted to sparse matrix.\033[0m') - self.logger.warning('\033[1;31mLoad interaction matrix may lead to label leakage from testing phase, this implementation \033[0m' - '\033[1;31monly provides the interactions corresponding to specific phase\033[0m') + self.logger.warning('\033[1;33mLoad interaction matrix may lead to label leakage from testing phase, this implementation \033[0m' + '\033[1;33monly provides the interactions corresponding to specific phase\033[0m') local_inter_feat = self.inter_feat[self.uid_list] return self._create_sparse_matrix(local_inter_feat, self.uid_field, self.iid_field, form, value_field) @@ -133,12 +133,12 @@ def build(self, eval_setting): ordering_args = eval_setting.ordering_args if ordering_args['strategy'] == 'shuffle': - raise ValueError('Ordering strategy `shuffle` is not supported in sequential models.') + raise ValueError('\033[1;31mOrdering strategy `shuffle` is not supported in sequential models.\033[0m') elif ordering_args['strategy'] == 'by': if ordering_args['field'] != self.time_field: - raise ValueError('Sequential models require `TO` (time ordering) strategy.') + raise ValueError('\033[1;31mSequential models require `TO` (time ordering) strategy.\033[0m') if ordering_args['ascending'] is not True: - raise ValueError('Sequential models require `time_field` to sort in ascending order.') + raise ValueError('\033[1;31mSequential models require `time_field` to sort in ascending order.\033[0m') group_field = eval_setting.group_field @@ -146,4 +146,4 @@ def build(self, eval_setting): if split_args['strategy'] == 'loo': return self.leave_one_out(group_by=group_field, leave_one_num=split_args['leave_one_num']) else: - ValueError('Sequential models require `loo` (leave one out) split strategy.') + ValueError('\033[1;31mSequential models require `loo` (leave one out) split strategy.\033[0m') diff --git a/recbole/data/dataset/social_dataset.py b/recbole/data/dataset/social_dataset.py index 281030328..09cb960e1 100644 --- a/recbole/data/dataset/social_dataset.py +++ b/recbole/data/dataset/social_dataset.py @@ -65,10 +65,10 @@ def _load_net(self, dataset_name, dataset_path): if os.path.isfile(net_file_path): net_feat = self._load_feat(net_file_path, FeatureSource.NET) if net_feat is None: - raise ValueError('.net file exist, but net_feat is None, please check your load_col') + raise ValueError('\033[1;31m.net file exist, but net_feat is None, please check your load_col\033[0m') return net_feat else: - raise ValueError(f'File {net_file_path} not exist.') + raise ValueError(f'\033[1;31mFile {net_file_path} not exist.\033[0m') def _get_fields_in_same_space(self): """Parsing ``config['fields_in_same_space']``. See :doc:`../user_guide/data/data_args` for detail arg setting. @@ -120,7 +120,7 @@ def net_graph(self, form='coo', value_field=None): elif form in ['dgl', 'pyg']: return self._create_graph(*args) else: - raise NotImplementedError('net graph format [{}] has not been implemented.') + raise NotImplementedError('\033[1;31mnet graph format [{}] has not been implemented.\033[0m') def __str__(self): info = [super().__str__(), f'The number of connections of social network: {len(self.net_feat)}'] diff --git a/recbole/data/interaction.py b/recbole/data/interaction.py index 218b8b035..bd29f354d 100644 --- a/recbole/data/interaction.py +++ b/recbole/data/interaction.py @@ -86,7 +86,7 @@ def __init__(self, interaction, pos_len_list=None, user_len_list=None): self.set_additional_info(pos_len_list, user_len_list) for k in self.interaction: if not isinstance(self.interaction[k], torch.Tensor): - raise ValueError(f'Interaction [{interaction}] should only contains torch.Tensor') + raise ValueError(f'\033[1;31mInteraction [{interaction}] should only contains torch.Tensor\033[0m') self.length = -1 for k in self.interaction: self.length = max(self.length, self.interaction[k].shape[0]) @@ -95,7 +95,7 @@ def set_additional_info(self, pos_len_list=None, user_len_list=None): self.pos_len_list = pos_len_list self.user_len_list = user_len_list if (self.pos_len_list is None) ^ (self.user_len_list is None): - raise ValueError('pos_len_list and user_len_list should be both None or valued.') + raise ValueError('\033[1;31mpos_len_list and user_len_list should be both None or valued.\033[0m') def __iter__(self): return self.interaction.__iter__() @@ -253,7 +253,7 @@ def drop(self, column): column (str): the column to be dropped. """ if column not in self.interaction: - raise ValueError(f'Column [{column}] is not in [{self}].') + raise ValueError(f'\033[1;31mColumn [{column}] is not in [{self}].\033[0m') del self.interaction[column] def _reindex(self, index): @@ -285,29 +285,29 @@ def sort(self, by, ascending=True): """ if isinstance(by, str): if by not in self.interaction: - raise ValueError(f'[{by}] is not exist in interaction [{self}].') + raise ValueError(f'\033[1;31m[{by}] is not exist in interaction [{self}].\033[0m') by = [by] elif isinstance(by, (list, tuple)): for b in by: if b not in self.interaction: - raise ValueError(f'[{b}] is not exist in interaction [{self}].') + raise ValueError(f'\033[1;31m[{b}] is not exist in interaction [{self}].\033[0m') else: - raise TypeError(f'Wrong type of by [{by}].') + raise TypeError(f'\033[1;31mWrong type of by [{by}].\033[0m') if isinstance(ascending, bool): ascending = [ascending] elif isinstance(ascending, (list, tuple)): for a in ascending: if not isinstance(a, bool): - raise TypeError(f'Wrong type of ascending [{ascending}].') + raise TypeError(f'\033[1;31mWrong type of ascending [{ascending}].\033[0m') else: - raise TypeError(f'Wrong type of ascending [{ascending}].') + raise TypeError(f'\033[1;31mWrong type of ascending [{ascending}].\033[0m') if len(by) != len(ascending): if len(ascending) == 1: ascending = ascending * len(by) else: - raise ValueError(f'by [{by}] and ascending [{ascending}] should have same length.') + raise ValueError(f'\033[1;31mby [{by}] and ascending [{ascending}] should have same length.\033[0m') for b, a in zip(by[::-1], ascending[::-1]): index = np.argsort(self.interaction[b], kind='stable') @@ -334,14 +334,14 @@ def cat_interactions(interactions): :class:`Interaction`: Concatenated interaction. """ if not isinstance(interactions, (list, tuple)): - raise TypeError(f'Interactions [{interactions}] should be list or tuple.') + raise TypeError(f'\033[1;31mInteractions [{interactions}] should be list or tuple.\033[0m') if len(interactions) == 0: - raise ValueError(f'Interactions [{interactions}] should have some interactions.') + raise ValueError(f'\033[1;31mInteractions [{interactions}] should have some interactions.\033[0m') columns_set = set(interactions[0].columns) for inter in interactions: if columns_set != set(inter.columns): - raise ValueError(f'Interactions [{interactions}] should have some interactions.') + raise ValueError(f'\033[1;31mInteractions [{interactions}] should have some interactions.\033[0m') new_inter = {col: torch.cat([inter[col] for inter in interactions]) for col in columns_set} return Interaction(new_inter) diff --git a/recbole/data/utils.py b/recbole/data/utils.py index 2c2c5af6c..794974604 100644 --- a/recbole/data/utils.py +++ b/recbole/data/utils.py @@ -24,10 +24,8 @@ def create_dataset(config): """Create dataset according to :attr:`config['model']` and :attr:`config['MODEL_TYPE']`. - Args: config (Config): An instance object of Config, used to record parameter information. - Returns: Dataset: Constructed dataset. """ @@ -56,13 +54,11 @@ def create_dataset(config): def data_preparation(config, dataset, save=False): """Split the dataset by :attr:`config['eval_setting']` and call :func:`dataloader_construct` to create corresponding dataloader. - Args: config (Config): An instance object of Config, used to record parameter information. dataset (Dataset): An instance object of Dataset, which contains all interaction records. save (bool, optional): If ``True``, it will call :func:`save_datasets` to save split dataset. Defaults to ``False``. - Returns: tuple: - train_data (AbstractDataLoader): The dataloader for training. @@ -77,131 +73,87 @@ def data_preparation(config, dataset, save=False): train_dataset, valid_dataset, test_dataset = built_datasets phases = ['train', 'valid', 'test'] sampler = None + logger = getLogger() + train_neg_sample_args = config['train_neg_sample_args'] + eval_neg_sample_args = es.neg_sample_args if save: save_datasets(config['checkpoint_dir'], name=phases, dataset=built_datasets) - kwargs = {} - if config['training_neg_sample_num']: + # Training + train_kwargs = { + 'config': config, + 'dataset': train_dataset, + 'batch_size': config['train_batch_size'], + 'dl_format': config['MODEL_INPUT_TYPE'], + 'shuffle': True, + } + if train_neg_sample_args['strategy'] != 'none': if dataset.label_field in dataset.inter_feat: raise ValueError( - f'`training_neg_sample_num` should be 0 ' - f'if inter_feat have label_field [{dataset.label_field}].' + f'\033[1;31m`training_neg_sample_num` should be 0 \033[0m' + f'\033[1;31mif inter_feat have label_field [{dataset.label_field}].\033[0m' ) if model_type != ModelType.SEQUENTIAL: - sampler = Sampler(phases, built_datasets, config['train_neg_sample_args']['distribution']) + sampler = Sampler(phases, built_datasets, train_neg_sample_args['distribution']) else: - sampler = RepeatableSampler(phases, dataset, config['train_neg_sample_args']['distribution']) - kwargs['sampler'] = sampler.set_phase('train') - kwargs['neg_sample_args'] = copy.deepcopy(config['train_neg_sample_args']) + sampler = RepeatableSampler(phases, dataset, train_neg_sample_args['distribution']) + train_kwargs['sampler'] = sampler.set_phase('train') + train_kwargs['neg_sample_args'] = train_neg_sample_args if model_type == ModelType.KNOWLEDGE: - kg_sampler = KGSampler(dataset, config['train_neg_sample_args']['distribution']) - kwargs['kg_sampler'] = kg_sampler - train_data = dataloader_construct( - name='train', - config=config, - eval_setting=es, - dataset=train_dataset, - dl_format=config['MODEL_INPUT_TYPE'], - batch_size=config['train_batch_size'], - shuffle=True, - **kwargs - ) - - kwargs = {} - if es.neg_sample_args['strategy'] != 'none': + kg_sampler = KGSampler(dataset, train_neg_sample_args['distribution']) + train_kwargs['kg_sampler'] = kg_sampler + + dataloader = get_data_loader('train', config, train_neg_sample_args) + logger.info(f'\033[1;35mBuild\033[0m \033[1;33m[{dataloader.__name__}]\033[0m for \033[1;33m[train]\033[0m with format \033[1;33m[{train_kwargs["dl_format"]}]\033[0m') + if train_neg_sample_args['strategy'] != 'none': + logger.info(f'\033[1;35m[train]\033[0m \033[1;34mNegative Sampling\033[0m: {train_neg_sample_args}') + else: + logger.info(f'\033[1;33m[train] No Negative Sampling\033[0m') + logger.info(f'\033[1;35m[train]\033[0m \033[1;36mbatch_size\033[0m = \033[0;33m[{train_kwargs["batch_size"]}]\033[0m, \033[1;36mshuffle\033[0m = \033[0;33m[{train_kwargs["shuffle"]}]\033[0m\n') + train_data = dataloader(**train_kwargs) + + # Evaluation + eval_kwargs = { + 'config': config, + 'batch_size': config['eval_batch_size'], + 'dl_format': InputType.POINTWISE, + 'shuffle': False, + } + valid_kwargs = {'dataset': valid_dataset} + test_kwargs = {'dataset': test_dataset} + if eval_neg_sample_args['strategy'] != 'none': if dataset.label_field in dataset.inter_feat: raise ValueError( - f'It can not validate with `{es.es_str[1]}` ' - f'when inter_feat have label_field [{dataset.label_field}].' + f'\033[1;31mIt can not validate with `{es.es_str[1]}` \033[0m' + f'\033[1;31mwhen inter_feat have label_field [{dataset.label_field}].\033[0m' ) if sampler is None: if model_type != ModelType.SEQUENTIAL: - sampler = Sampler(phases, built_datasets, es.neg_sample_args['distribution']) + sampler = Sampler(phases, built_datasets, eval_neg_sample_args['distribution']) else: - sampler = RepeatableSampler(phases, dataset, es.neg_sample_args['distribution']) - sampler.set_distribution(es.neg_sample_args['distribution']) - kwargs['sampler'] = [sampler.set_phase('valid'), sampler.set_phase('test')] - kwargs['neg_sample_args'] = copy.deepcopy(es.neg_sample_args) - valid_data, test_data = dataloader_construct( - name='evaluation', - config=config, - eval_setting=es, - dataset=[valid_dataset, test_dataset], - batch_size=config['eval_batch_size'], - **kwargs - ) - - return train_data, valid_data, test_data - - -def dataloader_construct( - name, config, eval_setting, dataset, dl_format=InputType.POINTWISE, batch_size=1, shuffle=False, **kwargs -): - """Get a correct dataloader class by calling :func:`get_data_loader` to construct dataloader. - - Args: - name (str): The stage of dataloader. It can only take two values: 'train' or 'evaluation'. - config (Config): An instance object of Config, used to record parameter information. - eval_setting (EvalSetting): An instance object of EvalSetting, used to record evaluation settings. - dataset (Dataset or list of Dataset): The split dataset for constructing dataloader. - dl_format (InputType, optional): The input type of dataloader. Defaults to - :obj:`~recbole.utils.enum_type.InputType.POINTWISE`. - batch_size (int, optional): The batch_size of dataloader. Defaults to ``1``. - shuffle (bool, optional): Whether the dataloader will be shuffle after a round. Defaults to ``False``. - **kwargs: Other input args of dataloader, such as :attr:`sampler`, :attr:`kg_sampler` - and :attr:`neg_sample_args`. The meaning of these args is the same as these args in some dataloaders. - - Returns: - AbstractDataLoader or list of AbstractDataLoader: Constructed dataloader in split dataset. - """ - if not isinstance(dataset, list): - dataset = [dataset] - - if not isinstance(batch_size, list): - batch_size = [batch_size] * len(dataset) + sampler = RepeatableSampler(phases, dataset, eval_neg_sample_args['distribution']) + else: + sampler.set_distribution(eval_neg_sample_args['distribution']) + eval_kwargs['neg_sample_args'] = eval_neg_sample_args + valid_kwargs['sampler'] = sampler.set_phase('valid') + test_kwargs['sampler'] = sampler.set_phase('test') + valid_kwargs.update(eval_kwargs) + test_kwargs.update(eval_kwargs) - if len(dataset) != len(batch_size): - raise ValueError(f'Dataset {dataset} and batch_size {batch_size} should have the same length.') + dataloader = get_data_loader('evaluation', config, eval_neg_sample_args) + logger.info(f'\033[1;35mBuild\033[0m \033[1;33m[{dataloader.__name__}]\033[0m for \033[1;33m[evaluation]\033[0m with format \033[1;33m[{eval_kwargs["dl_format"]}]\033[0m') + logger.info(es) + logger.info(f'\033[1;35m[evaluation]\033[0m \033[1;36mbatch_size\033[0m = \033[1;33m[{eval_kwargs["batch_size"]}]\033[0m, \033[1;36mshuffle\033[0m = \033[1;33m[{eval_kwargs["shuffle"]}]\033[0m\n') - kwargs_list = [{} for _ in range(len(dataset))] - for key, value in kwargs.items(): - key = [key] * len(dataset) - if not isinstance(value, list): - value = [value] * len(dataset) - if len(dataset) != len(value): - raise ValueError(f'Dataset {dataset} and {key} {value} should have the same length.') - for kw, k, w in zip(kwargs_list, key, value): - kw[k] = w + valid_data = dataloader(**valid_kwargs) + test_data = dataloader(**test_kwargs) - model_type = config['MODEL_TYPE'] - logger = getLogger() - logger.info(f'\033[1;35mBuild\033[0m \033[1;33m[{model_type}]\033[0m DataLoader for \033[1;33m[{name}]\033[0m with format \033[1;33m[{dl_format}]\033[0m') - logger.info(eval_setting) - logger.info(f'\033[0;36mbatch_size\033[0m = \033[0;33m[{batch_size}]\033[0m, \033[0;36mshuffle\033[0m = \033[0;33m[{shuffle}]\033[0m\n') - - if 'neg_sample_args' in kwargs: - dataloader = get_data_loader(name, config, kwargs['neg_sample_args']) - else: - dataloader = get_data_loader(name, config, eval_setting.neg_sample_args) - - try: - ret = [ - dataloader(config=config, dataset=ds, batch_size=bs, dl_format=dl_format, shuffle=shuffle, **kw) - for ds, bs, kw in zip(dataset, batch_size, kwargs_list) - ] - except TypeError: - raise ValueError('training_neg_sample_num should be 0') - - if len(ret) == 1: - return ret[0] - else: - return ret + return train_data, valid_data, test_data def save_datasets(save_path, name, dataset): """Save split datasets. - Args: save_path (str): The path of directory for saving. name (str or list of str): The stage of dataloader. It can only take two values: 'train' or 'evaluation'. @@ -211,7 +163,7 @@ def save_datasets(save_path, name, dataset): name = [name] dataset = [dataset] if len(name) != len(dataset): - raise ValueError(f'Length of name {name} should equal to length of dataset {dataset}.') + raise ValueError(f'\033[1;31mLength of name {name} should equal to length of dataset {dataset}.\033[0m') for i, d in enumerate(dataset): cur_path = os.path.join(save_path, name[i]) ensure_dir(cur_path) @@ -220,12 +172,10 @@ def save_datasets(save_path, name, dataset): def get_data_loader(name, config, neg_sample_args): """Return a dataloader class according to :attr:`config` and :attr:`eval_setting`. - Args: name (str): The stage of dataloader. It can only take two values: 'train' or 'evaluation'. config (Config): An instance object of Config, used to record parameter information. neg_sample_args (dict) : Settings of negative sampling. - Returns: type: The dataloader class that meets the requirements in :attr:`config` and :attr:`eval_setting`. """ @@ -242,39 +192,25 @@ def get_data_loader(name, config, neg_sample_args): if config['model'] in register_table: return register_table[config['model']](name, config, neg_sample_args) + model_type_table = { + ModelType.GENERAL: 'General', + ModelType.TRADITIONAL: 'General', + ModelType.CONTEXT: 'Context', + ModelType.SEQUENTIAL: 'Sequential', + ModelType.DECISIONTREE: 'DecisionTree', + } + neg_sample_strategy_table = { + 'none': 'DataLoader', + 'by': 'NegSampleDataLoader', + 'full': 'FullDataLoader', + } model_type = config['MODEL_TYPE'] - if name == 'train' and config['train_neg_sample_args'] != None: - neg_sample_strategy = config['train_neg_sample_args']['strategy'] - else: - neg_sample_strategy = neg_sample_args['strategy'] - if model_type == ModelType.GENERAL or model_type == ModelType.TRADITIONAL: - if neg_sample_strategy == 'none': - return GeneralDataLoader - elif neg_sample_strategy == 'by': - return GeneralNegSampleDataLoader - elif neg_sample_strategy == 'full': - return GeneralFullDataLoader - elif model_type == ModelType.CONTEXT: - if neg_sample_strategy == 'none': - return ContextDataLoader - elif neg_sample_strategy == 'by': - return ContextNegSampleDataLoader - elif neg_sample_strategy == 'full': - return ContextFullDataLoader - elif model_type == ModelType.SEQUENTIAL: - if neg_sample_strategy == 'none': - return SequentialDataLoader - elif neg_sample_strategy == 'by': - return SequentialNegSampleDataLoader - elif neg_sample_strategy == 'full': - return SequentialFullDataLoader - elif model_type == ModelType.DECISIONTREE: - if neg_sample_strategy == 'none': - return DecisionTreeDataLoader - elif neg_sample_strategy == 'by': - return DecisionTreeNegSampleDataLoader - elif neg_sample_strategy == 'full': - return DecisionTreeFullDataLoader + neg_sample_strategy = neg_sample_args['strategy'] + dataloader_module = importlib.import_module('recbole.data.dataloader') + + if model_type in model_type_table and neg_sample_strategy in neg_sample_strategy_table: + dataloader_name = model_type_table[model_type] + neg_sample_strategy_table[neg_sample_strategy] + return getattr(dataloader_module, dataloader_name) elif model_type == ModelType.KNOWLEDGE: if neg_sample_strategy == 'by': if name == 'train': @@ -284,23 +220,19 @@ def get_data_loader(name, config, neg_sample_args): elif neg_sample_strategy == 'full': return GeneralFullDataLoader elif neg_sample_strategy == 'none': - # return GeneralDataLoader - # TODO 训练也可以为none? 看general的逻辑似乎是都可以为None raise NotImplementedError( - 'The use of external negative sampling for knowledge model has not been implemented' + '\033[1;31mThe use of external negative sampling for knowledge model has not been implemented\033[0m' ) else: - raise NotImplementedError(f'Model_type [{model_type}] has not been implemented.') + raise NotImplementedError(f'\033[1;31mModel_type [{model_type}] has not been implemented.\033[0m') def _get_DIN_data_loader(name, config, neg_sample_args): """Customized function for DIN to get correct dataloader class. - Args: name (str): The stage of dataloader. It can only take two values: 'train' or 'evaluation'. config (Config): An instance object of Config, used to record parameter information. neg_sample_args : Settings of negative sampling. - Returns: type: The dataloader class that meets the requirements in :attr:`config` and :attr:`eval_setting`. """ @@ -315,12 +247,10 @@ def _get_DIN_data_loader(name, config, neg_sample_args): def _get_AE_data_loader(name, config, neg_sample_args): """Customized function for Multi-DAE and Multi-VAE to get correct dataloader class. - Args: name (str): The stage of dataloader. It can only take two values: 'train' or 'evaluation'. config (Config): An instance object of Config, used to record parameter information. neg_sample_args (dict): Settings of negative sampling. - Returns: type: The dataloader class that meets the requirements in :attr:`config` and :attr:`eval_setting`. """ @@ -338,16 +268,12 @@ def _get_AE_data_loader(name, config, neg_sample_args): class DLFriendlyAPI(object): """A Decorator class, which helps copying :class:`Dataset` methods to :class:`DataLoader`. - These methods are called *DataLoader Friendly APIs*. - E.g. if ``train_data`` is an object of :class:`DataLoader`, and :meth:`~recbole.data.dataset.dataset.Dataset.num` is a method of :class:`~recbole.data.dataset.dataset.Dataset`, Cause it has been decorated, :meth:`~recbole.data.dataset.dataset.Dataset.num` can be called directly by ``train_data``. - See the example of :meth:`set` for details. - Attributes: dataloader_apis (set): Register table that saves all the method names of DataLoader Friendly APIs. """ @@ -362,9 +288,7 @@ def set(self): """ Example: .. code:: python - from recbole.data.utils import dlapi - @dlapi.set() def dataset_meth(): ... @@ -377,4 +301,4 @@ def decorator(f): return decorator -dlapi = DLFriendlyAPI() +dlapi = DLFriendlyAPI() \ No newline at end of file diff --git a/recbole/evaluator/abstract_evaluator.py b/recbole/evaluator/abstract_evaluator.py index 45254c9a6..88f4ad2e3 100644 --- a/recbole/evaluator/abstract_evaluator.py +++ b/recbole/evaluator/abstract_evaluator.py @@ -114,7 +114,7 @@ def full_sort_collect(self, true_scores, pred_scores): """it is called when evaluation sample distribution is `full`. """ - raise NotImplementedError('full sort can\'t use IndividualEvaluator') + raise NotImplementedError('\033[1;31mfull sort can\'t use IndividualEvaluator\033[0m') def get_score_matrix(self, true_scores, pred_scores): """get score matrix @@ -133,4 +133,4 @@ def get_score_matrix(self, true_scores, pred_scores): def _check_args(self): if self.full: - raise NotImplementedError('full sort can\'t use IndividualEvaluator') + raise NotImplementedError('\033[1;31mfull sort can\'t use IndividualEvaluator\033[0m') diff --git a/recbole/evaluator/evaluators.py b/recbole/evaluator/evaluators.py index 8b0d97238..56f7f1ec1 100644 --- a/recbole/evaluator/evaluators.py +++ b/recbole/evaluator/evaluators.py @@ -113,11 +113,11 @@ def _check_args(self): for topk in self.topk: if topk <= 0: raise ValueError( - 'topk must be a positive integer or a list of positive integers, ' - 'but get `{}`'.format(topk) + '\033[1;31mtopk must be a positive integer or a list of positive integers, \033[0m' + '\033[1;31mbut get `{}`\033[0m'.format(topk) ) else: - raise TypeError('The topk must be a integer, list') + raise TypeError('\033[1;31mThe topk must be a integer, list\033[0m') def _calculate_metrics(self, pos_len_list, topk_idx, shapes): """integrate the results of each batch and evaluate the topk metrics by users diff --git a/recbole/evaluator/metrics.py b/recbole/evaluator/metrics.py index ed38891f8..4c18798de 100644 --- a/recbole/evaluator/metrics.py +++ b/recbole/evaluator/metrics.py @@ -27,15 +27,11 @@ def hit_(pos_index, pos_len): r"""Hit_ (also known as hit ratio at :math:`N`) is a way of calculating how many 'hits' you have in an n-sized list of ranked items. - .. _Hit: https://medium.com/@rishabhbhatia315/recommendation-system-evaluation-metrics-3f6739288870 - .. math:: \mathrm {HR@K} =\frac{Number \space of \space Hits @K}{|GT|} - :math:`HR` is the number of users with a positive sample in the recommendation list. :math:`GT` is the total number of samples in the test set. - """ result = np.cumsum(pos_index, axis=1) return (result > 0).astype(int) @@ -44,15 +40,11 @@ def hit_(pos_index, pos_len): def mrr_(pos_index, pos_len): r"""The MRR_ (also known as mean reciprocal rank) is a statistic measure for evaluating any process that produces a list of possible responses to a sample of queries, ordered by probability of correctness. - .. _MRR: https://en.wikipedia.org/wiki/Mean_reciprocal_rank - .. math:: \mathrm {MRR} = \frac{1}{|{U}|} \sum_{i=1}^{|{U}|} \frac{1}{rank_i} - :math:`U` is the number of users, :math:`rank_i` is the rank of the first item in the recommendation list in the test set results for user :math:`i`. - """ idxs = pos_index.argmax(axis=1) result = np.zeros_like(pos_index, dtype=np.float) @@ -66,19 +58,15 @@ def mrr_(pos_index, pos_len): def map_(pos_index, pos_len): r"""MAP_ (also known as Mean Average Precision) The MAP is meant to calculate Avg. Precision for the relevant items. - Note: In this case the normalization factor used is :math:`\frac{1}{\min (m,N)}`, which prevents your AP score from being unfairly suppressed when your number of recommendations couldn't possibly capture all the correct ones. - .. _map: http://sdsawtelle.github.io/blog/output/mean-average-precision-MAP-for-recommender-systems.html#MAP-for-Recommender-Algorithms - .. math:: \begin{align*} \mathrm{AP@N} &= \frac{1}{\mathrm{min}(m,N)}\sum_{k=1}^N P(k) \cdot rel(k) \\ \mathrm{MAP@N}& = \frac{1}{|U|}\sum_{u=1}^{|U|}(\mathrm{AP@N})_u \end{align*} - """ pre = precision_(pos_index, pos_len) sum_pre = np.cumsum(pre * pos_index.astype(np.float), axis=1) @@ -95,16 +83,12 @@ def map_(pos_index, pos_len): def recall_(pos_index, pos_len): r"""Recall_ (also known as sensitivity) is the fraction of the total amount of relevant instances that were actually retrieved - .. _recall: https://en.wikipedia.org/wiki/Precision_and_recall#Recall - .. math:: \mathrm {Recall@K} = \frac{|Rel_u\cap Rec_u|}{Rel_u} - :math:`Rel_u` is the set of items relevant to user :math:`U`, :math:`Rec_u` is the top K items recommended to users. We obtain the result by calculating the average :math:`Recall@K` of each user. - """ return np.cumsum(pos_index, axis=1) / pos_len.reshape(-1, 1) @@ -112,9 +96,7 @@ def recall_(pos_index, pos_len): def ndcg_(pos_index, pos_len): r"""NDCG_ (also known as normalized discounted cumulative gain) is a measure of ranking quality. Through normalizing the score, users and their recommendation list results in the whole test set can be evaluated. - .. _NDCG: https://en.wikipedia.org/wiki/Discounted_cumulative_gain#Normalized_DCG - .. math:: \begin{gather} \mathrm {DCG@K}=\sum_{i=1}^{K} \frac{2^{rel_i}-1}{\log_{2}{(i+1)}}\\ @@ -122,12 +104,10 @@ def ndcg_(pos_index, pos_len): \mathrm {NDCG_u@K}=\frac{DCG_u@K}{IDCG_u@K}\\ \mathrm {NDCG@K}=\frac{\sum \nolimits_{u \in U^{te}NDCG_u@K}}{|U^{te}|} \end{gather} - :math:`K` stands for recommending :math:`K` items. And the :math:`rel_i` is the relevance of the item in position :math:`i` in the recommendation list. :math:`{rel_i}` equals to 1 if the item is ground truth otherwise 0. :math:`U^{te}` stands for all users in the test set. - """ len_rank = np.full_like(pos_len, pos_index.shape[1]) idcg_len = np.where(pos_len > len_rank, len_rank, pos_len) @@ -150,16 +130,12 @@ def ndcg_(pos_index, pos_len): def precision_(pos_index, pos_len): r"""Precision_ (also called positive predictive value) is the fraction of relevant instances among the retrieved instances - .. _precision: https://en.wikipedia.org/wiki/Precision_and_recall#Precision - .. math:: \mathrm {Precision@K} = \frac{|Rel_u \cap Rec_u|}{Rec_u} - :math:`Rel_u` is the set of items relevant to user :math:`U`, :math:`Rec_u` is the top K items recommended to users. We obtain the result by calculating the average :math:`Precision@K` of each user. - """ return pos_index.cumsum(axis=1) / np.arange(1, pos_index.shape[1] + 1) @@ -167,24 +143,19 @@ def precision_(pos_index, pos_len): def gauc_(user_len_list, pos_len_list, pos_rank_sum): r"""GAUC_ (also known as Group Area Under Curve) is used to evaluate the two-class model, referring to the area under the ROC curve grouped by user. - .. _GAUC: https://dl.acm.org/doi/10.1145/3219819.3219823 - Note: It calculates the AUC score of each user, and finally obtains GAUC by weighting the user AUC. It is also not limited to k. Due to our padding for `scores_tensor` in `RankEvaluator` with `-np.inf`, the padding value will influence the ranks of origin items. Therefore, we use descending sort here and make an identity transformation to the formula of `AUC`, which is shown in `auc_` function. For readability, we didn't do simplification in the code. - .. math:: \mathrm {GAUC} = \frac {{{M} \times {(M+N+1)} - \frac{M \times (M+1)}{2}} - \sum\limits_{i=1}^M rank_{i}} {{M} \times {N}} - :math:`M` is the number of positive samples. :math:`N` is the number of negative samples. :math:`rank_i` is the descending rank of the ith positive sample. - """ neg_len_list = user_len_list - pos_len_list @@ -195,17 +166,17 @@ def gauc_(user_len_list, pos_len_list, pos_rank_sum): if any_without_pos: logger = getLogger() logger.warning( - "\033[1;31mNo positive samples in some users, \033[0m" - "\033[1;31mtrue positive value should be meaningless, \033[0m" - "\033[1;31mthese users have been removed from GAUC calculation\033[0m" + "\033[1;33mNo positive samples in some users, \033[0m" + "\033[1;33mtrue positive value should be meaningless, \033[0m" + "\033[1;33mthese users have been removed from GAUC calculation\033[0m" ) non_zero_idx *= (pos_len_list != 0) if any_without_neg: logger = getLogger() logger.warning( - "\033[1;31mNo negative samples in some users, \033[0m" - "\033[1;31mfalse positive value should be meaningless, \033[0m" - "\033[1;31mthese users have been removed from GAUC calculation\033[0m" + "\033[1;33mNo negative samples in some users, \033[0m" + "\033[1;33mfalse positive value should be meaningless, \033[0m" + "\033[1;33mthese users have been removed from GAUC calculation\033[0m" ) non_zero_idx *= (neg_len_list != 0) if any_without_pos or any_without_neg: @@ -224,22 +195,17 @@ def gauc_(user_len_list, pos_len_list, pos_rank_sum): def auc_(trues, preds): r"""AUC_ (also known as Area Under Curve) is used to evaluate the two-class model, referring to the area under the ROC curve - .. _AUC: https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve - Note: This metric does not calculate group-based AUC which considers the AUC scores averaged across users. It is also not limited to k. Instead, it calculates the scores on the entire prediction results regardless the users. - .. math:: \mathrm {AUC} = \frac{\sum\limits_{i=1}^M rank_{i} - \frac {{M} \times {(M+1)}}{2}} {{{M} \times {N}}} - :math:`M` is the number of positive samples. :math:`N` is the number of negative samples. :math:`rank_i` is the ascending rank of the ith positive sample. - """ fps, tps = _binary_clf_curve(trues, preds) @@ -253,14 +219,14 @@ def auc_(trues, preds): if fps[-1] <= 0: logger = getLogger() - logger.warning("\033[1;31mNo negative samples in y_true,\033[0m " "false positive value should be meaningless\033[0m") + logger.warning("\033[1;33mNo negative samples in y_true, \033[0m" "\033[1;33mfalse positive value should be meaningless\033[0m") fpr = np.repeat(np.nan, fps.shape) else: fpr = fps / fps[-1] if tps[-1] <= 0: logger = getLogger() - logger.warning("\033[1;31mNo positive samples in y_true,\033[0m " "\033[1;31mtrue positive value should be meaningless\033[0m") + logger.warning("\033[1;33mNo positive samples in y_true, \033[0m" "\033[1;33mtrue positive value should be meaningless\033[0m") tpr = np.repeat(np.nan, tps.shape) else: tpr = tps / tps[-1] @@ -273,45 +239,33 @@ def auc_(trues, preds): def mae_(trues, preds): r"""`Mean absolute error regression loss`__ - .. __: https://en.wikipedia.org/wiki/Mean_absolute_error - .. math:: \mathrm{MAE}=\frac{1}{|{T}|} \sum_{(u, i) \in {T}}\left|\hat{r}_{u i}-r_{u i}\right| - :math:`T` is the test set, :math:`\hat{r}_{u i}` is the score predicted by the model, and :math:`r_{u i}` the actual score of the test set. - """ return mean_absolute_error(trues, preds) def rmse_(trues, preds): r"""`Mean std error regression loss`__ - .. __: https://en.wikipedia.org/wiki/Root-mean-square_deviation - .. math:: \mathrm{RMSE} = \sqrt{\frac{1}{|{T}|} \sum_{(u, i) \in {T}}(\hat{r}_{u i}-r_{u i})^{2}} - :math:`T` is the test set, :math:`\hat{r}_{u i}` is the score predicted by the model, and :math:`r_{u i}` the actual score of the test set. - """ return np.sqrt(mean_squared_error(trues, preds)) def log_loss_(trues, preds): r"""`Log loss`__, aka logistic loss or cross-entropy loss - .. __: http://wiki.fast.ai/index.php/Log_Loss - .. math:: -\log {P(y_t|y_p)} = -(({y_t}\ \log{y_p}) + {(1-y_t)}\ \log{(1 - y_p)}) - For a single sample, :math:`y_t` is true label in :math:`\{0,1\}`. :math:`y_p` is the estimated probability that :math:`y_t = 1`. - """ eps = 1e-15 preds = np.float64(preds) @@ -325,7 +279,7 @@ def log_loss_(trues, preds): # TODO # def coverage_(): -# raise NotImplementedError +# raise NotImplemented # def gini_index_(): # raise NotImplementedError @@ -352,4 +306,4 @@ def log_loss_(trues, preds): 'logloss': log_loss_, 'auc': auc_, 'gauc': gauc_ -} +} \ No newline at end of file diff --git a/recbole/evaluator/proxy_evaluator.py b/recbole/evaluator/proxy_evaluator.py index a0df5246e..fa07f4d89 100644 --- a/recbole/evaluator/proxy_evaluator.py +++ b/recbole/evaluator/proxy_evaluator.py @@ -101,9 +101,9 @@ def _check_args(self): if isinstance(self.metrics, str): self.metrics = [self.metrics] else: - raise TypeError('metrics must be str or list') + raise TypeError('\033[1;31mmetrics must be str or list\033[0m') # Convert metric to lowercase for m in self.metrics: if m.lower() not in self.valid_metrics: - raise ValueError("There is no metric named {}!".format(m)) + raise ValueError("\033[1;31mThere is no metric named {}!\033[0m".format(m)) diff --git a/recbole/evaluator/utils.py b/recbole/evaluator/utils.py index 392873493..548f36ac7 100644 --- a/recbole/evaluator/utils.py +++ b/recbole/evaluator/utils.py @@ -66,7 +66,7 @@ def trunc(scores, method): try: cut_method = getattr(np, method) except NotImplementedError: - raise NotImplementedError("module 'numpy' has no function named '{}'".format(method)) + raise NotImplementedError("\033[1;31mmodule 'numpy' has no function named '{}'\033[0m".format(method)) scores = cut_method(scores) return scores diff --git a/recbole/model/abstract_recommender.py b/recbole/model/abstract_recommender.py index 95f02f6ba..026573055 100644 --- a/recbole/model/abstract_recommender.py +++ b/recbole/model/abstract_recommender.py @@ -326,7 +326,7 @@ def double_tower_embed_input_fields(self, interaction): """ if not self.double_tower: - raise RuntimeError('Please check your model hyper parameters and set \'double tower\' as True') + raise RuntimeError('\033[1;31mPlease check your model hyper parameters and set \'double tower\' as True\033[0m') sparse_embedding, dense_embedding = self.embed_input_fields(interaction) if dense_embedding is not None: first_dense_embedding, second_dense_embedding = \ diff --git a/recbole/model/context_aware_recommender/xdeepfm.py b/recbole/model/context_aware_recommender/xdeepfm.py index 74f95f395..527a5664b 100644 --- a/recbole/model/context_aware_recommender/xdeepfm.py +++ b/recbole/model/context_aware_recommender/xdeepfm.py @@ -49,8 +49,8 @@ def __init__(self, config, dataset): self.cin_layer_size = list(map(lambda x: int(x // 2 * 2), temp_cin_size)) if self.cin_layer_size[:-1] != temp_cin_size[:-1]: self.logger.warning( - '\033[1;31mLayer size of CIN should be even except for the last layer when direct is True.\033[0m' - '\033[1;31mIt is changed to {}\033[0m'.format(self.cin_layer_size) + '\033[1;33mLayer size of CIN should be even except for the last layer when direct is True.\033[0m' + '\033[1;33mIt is changed to {}\033[0m'.format(self.cin_layer_size) ) # Create a convolutional layer for each CIN layer diff --git a/recbole/model/general_recommender/cdae.py b/recbole/model/general_recommender/cdae.py index 627b01abd..b67d43f0b 100644 --- a/recbole/model/general_recommender/cdae.py +++ b/recbole/model/general_recommender/cdae.py @@ -50,14 +50,14 @@ def __init__(self, config, dataset): elif self.hid_activation == 'tanh': self.h_act = nn.Tanh() else: - raise ValueError('Invalid hidden layer activation function') + raise ValueError('\033[1;31mInvalid hidden layer activation function\033[0m') if self.out_activation == 'sigmoid': self.o_act = nn.Sigmoid() elif self.out_activation == 'relu': self.o_act = nn.ReLU() else: - raise ValueError('Invalid output layer activation function') + raise ValueError('\033[1;31mInvalid output layer activation function\033[0m') self.dropout = nn.Dropout(p=self.corruption_ratio) @@ -104,7 +104,7 @@ def calculate_loss(self, interaction): elif self.loss_type == 'BCE': loss_func = nn.BCELoss(reduction='sum') else: - raise ValueError('Invalid loss_type, loss_type must in [MSE, BCE]') + raise ValueError('\033[1;31mInvalid loss_type, loss_type must in [MSE, BCE]\033[0m') loss = loss_func(predict, x_items) # l1-regularization diff --git a/recbole/model/general_recommender/dmf.py b/recbole/model/general_recommender/dmf.py index 7c0fbe57d..4c7db632d 100644 --- a/recbole/model/general_recommender/dmf.py +++ b/recbole/model/general_recommender/dmf.py @@ -65,7 +65,7 @@ def __init__(self, config, dataset): self.history_item_id, self.history_item_value, _ = dataset.history_item_matrix(value_field=self.RATING) self.interaction_matrix = dataset.inter_matrix(form='csr', value_field=self.RATING).astype(np.float32) else: - raise ValueError("The inter_matrix_type must in ['01', 'rating'] but get {}".format(self.inter_matrix_type)) + raise ValueError("\033[1;31mThe inter_matrix_type must in ['01', 'rating'] but get {}\033[0m".format(self.inter_matrix_type)) self.max_rating = self.history_user_value.max() # tensor of shape [n_items, H] where H is max length of history interaction. self.history_user_id = self.history_user_id.to(self.device) diff --git a/recbole/model/general_recommender/fism.py b/recbole/model/general_recommender/fism.py index 02b165275..96a94fa63 100644 --- a/recbole/model/general_recommender/fism.py +++ b/recbole/model/general_recommender/fism.py @@ -49,9 +49,9 @@ def __init__(self, config, dataset): if self.split_to > 0: self.group = torch.chunk(torch.arange(self.n_items).to(self.device), self.split_to) else: - self.logger.warning('\033[1;31mPay Attetion!! the `split_to` is set to 0. If you catch a OMM error in this case,\033[0m ' + \ - '\033[1;31myou need to increase it \n\t\t\tuntil the error disappears. For example, \033[0m' + \ - '\033[1;31myou can append it in the command line such as `--split_to=5`\033[0m') + self.logger.warning('\033[1;33mPay Attetion!! the `split_to` is set to 0. If you catch a OMM error in this case,\033[0m ' + \ + '\033[1;33myou need to increase it \n\t\t\tuntil the error disappears. For example, \033[0m' + \ + '\033[1;33myou can append it in the command line such as `--split_to=5`\033[0m') # define layers and loss # construct source and destination item embedding matrix diff --git a/recbole/model/general_recommender/gcmc.py b/recbole/model/general_recommender/gcmc.py index 82fde721b..a7e3afc73 100644 --- a/recbole/model/general_recommender/gcmc.py +++ b/recbole/model/general_recommender/gcmc.py @@ -93,7 +93,7 @@ def __init__(self, config, dataset): div = self.gcn_output_dim // len(self.support) if self.gcn_output_dim % len(self.support) != 0: self.logger.warning( - "\033[1;31mHIDDEN[0] (=%d) of stack layer is adjusted to %d (in %d splits).\033[0m" % + "\033[1;33mHIDDEN[0] (=%d) of stack layer is adjusted to %d (in %d splits).\033[0m" % (self.gcn_output_dim, len(self.support) * div, len(self.support)) ) self.gcn_output_dim = len(self.support) * div diff --git a/recbole/model/general_recommender/nais.py b/recbole/model/general_recommender/nais.py index a9237db3a..8726fe14f 100644 --- a/recbole/model/general_recommender/nais.py +++ b/recbole/model/general_recommender/nais.py @@ -64,9 +64,9 @@ def __init__(self, config, dataset): self.logger.info('split the n_items to {} pieces'.format(self.split_to)) self.group = torch.chunk(torch.arange(self.n_items).to(self.device), self.split_to) else: - self.logger.warning('\033[1;31mPay Attetion!! the `split_to` is set to 0. If you catch a OMM error in this case,\033[0m] ' + \ - '\033[1;31myou need to increase it \n\t\t\tuntil the error disappears. For example,\033[0m ' + \ - '\033[1;31myou can append it in the command line such as `--split_to=5`\033[0m') + self.logger.warning('\033[1;33mPay Attetion!! the `split_to` is set to 0. If you catch a OMM error in this case,\033[0m] ' + \ + '\033[1;33myou need to increase it \n\t\t\tuntil the error disappears. For example,\033[0m ' + \ + '\033[1;33myou can append it in the command line such as `--split_to=5`\033[0m') # define layers and loss # construct source and destination item embedding matrix @@ -78,7 +78,7 @@ def __init__(self, config, dataset): elif self.algorithm == 'prod': self.mlp_layers = MLPLayers([self.embedding_size, self.weight_size]) else: - raise ValueError("NAIS just support attention type in ['concat', 'prod'] but get {}".format(self.algorithm)) + raise ValueError("\033[1;31mNAIS just support attention type in ['concat', 'prod'] but get {}\033[0m".format(self.algorithm)) self.weight_layer = nn.Parameter(torch.ones(self.weight_size, 1)) self.bceloss = nn.BCELoss() diff --git a/recbole/model/general_recommender/neumf.py b/recbole/model/general_recommender/neumf.py index 33d11d46f..a23673bdb 100644 --- a/recbole/model/general_recommender/neumf.py +++ b/recbole/model/general_recommender/neumf.py @@ -116,7 +116,7 @@ def forward(self, user, item): elif self.mlp_train: output = self.sigmoid(self.predict_layer(mlp_output)) else: - raise RuntimeError('mf_train and mlp_train can not be False at the same time') + raise RuntimeError('\033[1;31mmf_train and mlp_train can not be False at the same time\033[0m') return output.squeeze() def calculate_loss(self, interaction): diff --git a/recbole/model/general_recommender/nncf.py b/recbole/model/general_recommender/nncf.py index 79692eada..e64af1dce 100644 --- a/recbole/model/general_recommender/nncf.py +++ b/recbole/model/general_recommender/nncf.py @@ -80,8 +80,8 @@ def __init__(self, config, dataset): elif self.neigh_info_method == "louvain": self.u_neigh, self.i_neigh = self.get_neigh_louvain() else: - raise RuntimeError('You need to choose the right algorithm of processing neighborhood information. \ - The parameter neigh_info_method can be set to random, knn or louvain.') + raise RuntimeError('\033[1;31mYou need to choose the right algorithm of processing neighborhood information. \ + The parameter neigh_info_method can be set to random, knn or louvain.\033[0m') # parameters initialization self.apply(self._init_weights) diff --git a/recbole/model/layers.py b/recbole/model/layers.py index 3edb31568..c984de9e0 100644 --- a/recbole/model/layers.py +++ b/recbole/model/layers.py @@ -113,7 +113,7 @@ def activation_layer(activation_name='relu', emb_dim=None): elif issubclass(activation_name, nn.Module): activation = activation_name() else: - raise NotImplementedError("activation function {} is not implemented".format(activation_name)) + raise NotImplementedError("\033[1;31mactivation function {} is not implemented\033[0m".format(activation_name)) return activation @@ -352,8 +352,8 @@ def __init__(self, n_heads, hidden_size, hidden_dropout_prob, attn_dropout_prob, super(MultiHeadAttention, self).__init__() if hidden_size % n_heads != 0: raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (hidden_size, n_heads) + "\033[1;31mThe hidden size (%d) is not a multiple of the number of attention \033[0m" + "\033[1;31mheads (%d)\033[0m" % (hidden_size, n_heads) ) self.num_attention_heads = n_heads @@ -795,7 +795,7 @@ def __init__(self, dataset, embedding_size, pooling_mode, device): try: assert self.pooling_mode in ['mean', 'max', 'sum'] except AssertionError: - raise AssertionError("Make sure 'pooling_mode' in ['mean', 'max', 'sum']!") + raise AssertionError("\033[1;31mMake sure 'pooling_mode' in ['mean', 'max', 'sum']!\033[0m") self.get_fields_name_dim() self.get_embedding() @@ -820,7 +820,7 @@ def __init__(self, dataset, embedding_size, selected_features, pooling_mode, dev try: assert self.pooling_mode in ['mean', 'max', 'sum'] except AssertionError: - raise AssertionError("Make sure 'pooling_mode' in ['mean', 'max', 'sum']!") + raise AssertionError("\033[1;31mMake sure 'pooling_mode' in ['mean', 'max', 'sum']!\033[0m") self.get_fields_name_dim() self.get_embedding() @@ -866,7 +866,7 @@ def __init__(self, channels, kernels, strides, activation='relu', init_method=No self.num_of_nets = len(self.channels) - 1 if len(kernels) != len(strides) or self.num_of_nets != (len(kernels)): - raise RuntimeError('channels, kernels and strides don\'t match\n') + raise RuntimeError('\033[1;31mchannels, kernels and strides don\'t match\n\033[0m') cnn_modules = [] diff --git a/recbole/model/sequential_recommender/bert4rec.py b/recbole/model/sequential_recommender/bert4rec.py index 1a923b7db..6d52588ac 100644 --- a/recbole/model/sequential_recommender/bert4rec.py +++ b/recbole/model/sequential_recommender/bert4rec.py @@ -70,7 +70,7 @@ def __init__(self, config, dataset): try: assert self.loss_type in ['BPR', 'CE'] except AssertionError: - raise AssertionError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise AssertionError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # parameters initialization self.apply(self._init_weights) @@ -232,7 +232,7 @@ def calculate_loss(self, interaction): / torch.sum(targets) return loss else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") def predict(self, interaction): item_seq = interaction[self.ITEM_SEQ] diff --git a/recbole/model/sequential_recommender/caser.py b/recbole/model/sequential_recommender/caser.py index 229816deb..4d5117475 100644 --- a/recbole/model/sequential_recommender/caser.py +++ b/recbole/model/sequential_recommender/caser.py @@ -83,7 +83,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # parameters initialization self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/fdsa.py b/recbole/model/sequential_recommender/fdsa.py index 0fba79689..a4d21e51a 100644 --- a/recbole/model/sequential_recommender/fdsa.py +++ b/recbole/model/sequential_recommender/fdsa.py @@ -89,7 +89,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # parameters initialization self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/fossil.py b/recbole/model/sequential_recommender/fossil.py index 0432174bf..e4fa67458 100644 --- a/recbole/model/sequential_recommender/fossil.py +++ b/recbole/model/sequential_recommender/fossil.py @@ -54,7 +54,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # init the parameters of the model self.apply(self.init_weights) diff --git a/recbole/model/sequential_recommender/gcsan.py b/recbole/model/sequential_recommender/gcsan.py index e64381473..ac77f9be7 100644 --- a/recbole/model/sequential_recommender/gcsan.py +++ b/recbole/model/sequential_recommender/gcsan.py @@ -139,7 +139,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # parameters initialization self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/gru4rec.py b/recbole/model/sequential_recommender/gru4rec.py index 0ea93e331..1a6b9d23a 100644 --- a/recbole/model/sequential_recommender/gru4rec.py +++ b/recbole/model/sequential_recommender/gru4rec.py @@ -61,7 +61,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # parameters initialization self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/gru4recf.py b/recbole/model/sequential_recommender/gru4recf.py index 55f8d2360..5585cf8d3 100644 --- a/recbole/model/sequential_recommender/gru4recf.py +++ b/recbole/model/sequential_recommender/gru4recf.py @@ -81,7 +81,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # parameters initialization self.apply(xavier_normal_initialization) diff --git a/recbole/model/sequential_recommender/gru4reckg.py b/recbole/model/sequential_recommender/gru4reckg.py index e8e68d5d8..66dfae29e 100644 --- a/recbole/model/sequential_recommender/gru4reckg.py +++ b/recbole/model/sequential_recommender/gru4reckg.py @@ -64,7 +64,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # parameters initialization self.apply(xavier_normal_initialization) diff --git a/recbole/model/sequential_recommender/hgn.py b/recbole/model/sequential_recommender/hgn.py index dc5f6ec49..13c70d071 100644 --- a/recbole/model/sequential_recommender/hgn.py +++ b/recbole/model/sequential_recommender/hgn.py @@ -41,7 +41,7 @@ def __init__(self, config, dataset): self.pool_type = config["pooling_type"] if self.pool_type not in ["max", "average"]: - raise NotImplementedError("Make sure 'loss_type' in ['max', 'average']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['max', 'average']!\033[0m") # define the layers and loss function self.item_embedding = nn.Embedding(self.n_items, self.embedding_size, padding_idx=0) @@ -67,7 +67,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # init the parameters of the model self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/hrm.py b/recbole/model/sequential_recommender/hrm.py index 421835266..f65de9613 100644 --- a/recbole/model/sequential_recommender/hrm.py +++ b/recbole/model/sequential_recommender/hrm.py @@ -59,7 +59,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # init the parameters of the model self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/ksr.py b/recbole/model/sequential_recommender/ksr.py index bf48c6719..1640edd17 100644 --- a/recbole/model/sequential_recommender/ksr.py +++ b/recbole/model/sequential_recommender/ksr.py @@ -71,7 +71,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # parameters initialization self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/narm.py b/recbole/model/sequential_recommender/narm.py index 76f5594b7..44f7be530 100644 --- a/recbole/model/sequential_recommender/narm.py +++ b/recbole/model/sequential_recommender/narm.py @@ -59,7 +59,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # parameters initialization self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/nextitnet.py b/recbole/model/sequential_recommender/nextitnet.py index e0ad3def4..eb1179351 100644 --- a/recbole/model/sequential_recommender/nextitnet.py +++ b/recbole/model/sequential_recommender/nextitnet.py @@ -69,7 +69,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") self.reg_loss = RegLoss() # parameters initialization diff --git a/recbole/model/sequential_recommender/npe.py b/recbole/model/sequential_recommender/npe.py index 29674241a..d9cd4bd78 100644 --- a/recbole/model/sequential_recommender/npe.py +++ b/recbole/model/sequential_recommender/npe.py @@ -55,7 +55,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # init the parameters of the module self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/s3rec.py b/recbole/model/sequential_recommender/s3rec.py index 1aa86184f..9334fd5c1 100644 --- a/recbole/model/sequential_recommender/s3rec.py +++ b/recbole/model/sequential_recommender/s3rec.py @@ -103,7 +103,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE' and self.train_stage == 'finetune': self.loss_fct = nn.CrossEntropyLoss() elif self.train_stage == 'finetune': - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # parameters initialization assert self.train_stage in ['pretrain', 'finetune'] diff --git a/recbole/model/sequential_recommender/sasrec.py b/recbole/model/sequential_recommender/sasrec.py index ea58a8fdf..920b90b47 100644 --- a/recbole/model/sequential_recommender/sasrec.py +++ b/recbole/model/sequential_recommender/sasrec.py @@ -71,7 +71,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # parameters initialization self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/sasrecf.py b/recbole/model/sequential_recommender/sasrecf.py index b6c4feb4f..bac276f5e 100644 --- a/recbole/model/sequential_recommender/sasrecf.py +++ b/recbole/model/sequential_recommender/sasrecf.py @@ -70,7 +70,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # parameters initialization self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/shan.py b/recbole/model/sequential_recommender/shan.py index 19d7d435d..7599d6773 100644 --- a/recbole/model/sequential_recommender/shan.py +++ b/recbole/model/sequential_recommender/shan.py @@ -74,7 +74,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # init the parameter of the model self.apply(self.init_weights) diff --git a/recbole/model/sequential_recommender/srgnn.py b/recbole/model/sequential_recommender/srgnn.py index 0147f1499..9f275969f 100644 --- a/recbole/model/sequential_recommender/srgnn.py +++ b/recbole/model/sequential_recommender/srgnn.py @@ -135,7 +135,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # parameters initialization self._reset_parameters() diff --git a/recbole/model/sequential_recommender/stamp.py b/recbole/model/sequential_recommender/stamp.py index f9982734e..b63e568ae 100644 --- a/recbole/model/sequential_recommender/stamp.py +++ b/recbole/model/sequential_recommender/stamp.py @@ -60,7 +60,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") + raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") # # parameters initialization self.apply(self._init_weights) diff --git a/recbole/quick_start/quick_start.py b/recbole/quick_start/quick_start.py index 7062e584f..6f5bd3240 100644 --- a/recbole/quick_start/quick_start.py +++ b/recbole/quick_start/quick_start.py @@ -34,6 +34,7 @@ def run_recbole(model=None, dataset=None, config_file_list=None, config_dict=Non logger = getLogger() logger.info(config) + logger.debug('hhhh') # dataset filtering dataset = create_dataset(config) diff --git a/recbole/sampler/sampler.py b/recbole/sampler/sampler.py index e0e1a0d9b..58b30c100 100644 --- a/recbole/sampler/sampler.py +++ b/recbole/sampler/sampler.py @@ -61,14 +61,14 @@ def get_random_list(self): Returns: numpy.ndarray or list: Random list of value_id. """ - raise NotImplementedError('method [get_random_list] should be implemented') + raise NotImplementedError('\033[1;31mmethod [get_random_list] should be implemented\033[0m') def get_used_ids(self): """ Returns: numpy.ndarray: Used ids. Index is key_id, and element is a set of value_ids. """ - raise NotImplementedError('method [get_used_ids] should be implemented') + raise NotImplementedError('\033[1;31mmethod [get_used_ids] should be implemented\033[0m') def random(self): """ @@ -175,7 +175,7 @@ def __init__(self, phases, datasets, distribution='uniform'): if not isinstance(datasets, list): datasets = [datasets] if len(phases) != len(datasets): - raise ValueError(f'Phases {phases} and datasets {datasets} should have the same length.') + raise ValueError(f'\033[1;31mPhases {phases} and datasets {datasets} should have the same length.\033[0m') self.phases = phases self.datasets = datasets @@ -201,7 +201,7 @@ def get_random_list(self): random_item_list.extend(dataset.inter_feat[self.iid_field].numpy()) return random_item_list else: - raise NotImplementedError(f'Distribution [{self.distribution}] has not been implemented.') + raise NotImplementedError(f'\033[1;31mDistribution [{self.distribution}] has not been implemented.\033[0m') def get_used_ids(self): """ @@ -220,9 +220,9 @@ def get_used_ids(self): for used_item_set in used_item_id[self.phases[-1]]: if len(used_item_set) + 1 == self.n_items: # [pad] is a item. raise ValueError( - 'Some users have interacted with all items, ' - 'which we can not sample negative items for them. ' - 'Please set `max_user_inter_num` to filter those users.' + '\033[1;31mSome users have interacted with all items, \033[0m' + '\033[1;31mwhich we can not sample negative items for them. \033[0m' + '\033[1;31mPlease set `max_user_inter_num` to filter those users.\033[0m' ) return used_item_id @@ -237,7 +237,7 @@ def set_phase(self, phase): is set to the value of corresponding phase. """ if phase not in self.phases: - raise ValueError(f'Phase [{phase}] not exist.') + raise ValueError(f'\033[1;31mPhase [{phase}] not exist.\033[0m') new_sampler = copy.copy(self) new_sampler.phase = phase new_sampler.used_ids = new_sampler.used_ids[phase] @@ -262,7 +262,7 @@ def sample_by_user_ids(self, user_ids, num): except IndexError: for user_id in user_ids: if user_id < 0 or user_id >= self.n_users: - raise ValueError(f'user_id [{user_id}] not exist.') + raise ValueError(f'\033[1;31muser_id [{user_id}] not exist.\033[0m') class KGSampler(AbstractSampler): @@ -296,7 +296,7 @@ def get_random_list(self): elif self.distribution == 'popularity': return list(self.hid_list) + list(self.tid_list) else: - raise NotImplementedError(f'Distribution [{self.distribution}] has not been implemented.') + raise NotImplementedError(f'\033[1;31mDistribution [{self.distribution}] has not been implemented.\033[0m') def get_used_ids(self): """ @@ -311,8 +311,8 @@ def get_used_ids(self): for used_tail_set in used_tail_entity_id: if len(used_tail_set) + 1 == self.entity_num: # [pad] is a entity. raise ValueError( - 'Some head entities have relation with all entities, ' - 'which we can not sample negative entities for them.' + '\033[1;31mSome head entities have relation with all entities, \033[0m' + '\033[1;31mwhich we can not sample negative entities for them.\033[0m' ) return used_tail_entity_id @@ -335,7 +335,7 @@ def sample_by_entity_ids(self, head_entity_ids, num=1): except IndexError: for head_entity_id in head_entity_ids: if head_entity_id not in self.head_entities: - raise ValueError(f'head_entity_id [{head_entity_id}] not exist.') + raise ValueError(f'\033[1;31mhead_entity_id [{head_entity_id}] not exist.\033[0m') class RepeatableSampler(AbstractSampler): @@ -373,7 +373,7 @@ def get_random_list(self): elif self.distribution == 'popularity': return self.dataset.inter_feat[self.iid_field].numpy() else: - raise NotImplementedError(f'Distribution [{self.distribution}] has not been implemented.') + raise NotImplementedError(f'\033[1;31mDistribution [{self.distribution}] has not been implemented.\033[0m') def get_used_ids(self): """ @@ -402,7 +402,7 @@ def sample_by_user_ids(self, user_ids, num): except IndexError: for user_id in user_ids: if user_id < 0 or user_id >= self.n_users: - raise ValueError(f'user_id [{user_id}] not exist.') + raise ValueError(f'\033[1;31muser_id [{user_id}] not exist.\033[0m') def set_phase(self, phase): """Get the sampler of corresponding phase. @@ -414,7 +414,7 @@ def set_phase(self, phase): Sampler: the copy of this sampler, and :attr:`phase` is set the same as input phase. """ if phase not in self.phases: - raise ValueError(f'Phase [{phase}] not exist.') + raise ValueError(f'\033[1;31mPhase [{phase}] not exist.\033[0m') new_sampler = copy.copy(self) new_sampler.phase = phase return new_sampler diff --git a/recbole/trainer/hyper_tuning.py b/recbole/trainer/hyper_tuning.py index dda0c49e5..017cbf243 100644 --- a/recbole/trainer/hyper_tuning.py +++ b/recbole/trainer/hyper_tuning.py @@ -76,8 +76,8 @@ def _validate_space_exhaustive_search(space): if node.name in implicit_stochastic_symbols: if node.name not in supported_stochastic_symbols: raise ExhaustiveSearchError( - 'Exhaustive search is only possible with the following stochastic symbols: ' - '' + ', '.join(supported_stochastic_symbols) + '\033[1;31mExhaustive search is only possible with the following stochastic symbols: ' + '\033[1;31m' + ', '.join(supported_stochastic_symbols) ) @@ -163,13 +163,13 @@ def __init__( elif params_dict: self.space = self._build_space_from_dict(params_dict) else: - raise ValueError('at least one of `space`, `params_file` and `params_dict` is provided') + raise ValueError('\033[1;31mat least one of `space`, `params_file` and `params_dict` is provided\033[0m') if isinstance(algo, str): if algo == 'exhaustive': self.algo = partial(exhaustive_search, nbMaxSucessiveFailures=1000) self.max_evals = _spacesize(self.space) else: - raise ValueError('Illegal algo [{}]'.format(algo)) + raise ValueError('\033[1;31mIllegal algo [{}]\033[0m'.format(algo)) else: self.algo = algo @@ -196,7 +196,7 @@ def _build_space_from_file(file): low, high = para_value.strip().split(',') space[para_name] = hp.loguniform(para_name, float(low), float(high)) else: - raise ValueError('Illegal param type [{}]'.format(para_type)) + raise ValueError('\033[1;31mIllegal param type [{}]\033[0m'.format(para_type)) return space @staticmethod @@ -228,7 +228,7 @@ def _build_space_from_dict(config_dict): high = para_value[1] space[para_name] = hp.loguniform(para_name, float(low), float(high)) else: - raise ValueError('Illegal param type [{}]'.format(para_type)) + raise ValueError('\033[1;31mIllegal param type [{}]\033[0m'.format(para_type)) return space @staticmethod diff --git a/recbole/trainer/trainer.py b/recbole/trainer/trainer.py index 256832cf2..655601d6d 100644 --- a/recbole/trainer/trainer.py +++ b/recbole/trainer/trainer.py @@ -42,14 +42,14 @@ def fit(self, train_data): r"""Train the model based on the train data. """ - raise NotImplementedError('Method [next] should be implemented.') + raise NotImplementedError('\033[1;31mMethod [next] should be implemented.\033[0m') def evaluate(self, eval_data): r"""Evaluate the model based on the eval data. """ - raise NotImplementedError('Method [next] should be implemented.') + raise NotImplementedError('\033[1;31mMethod [next] should be implemented.\033[0m') class Trainer(AbstractTrainer): @@ -116,9 +116,9 @@ def _build_optimizer(self, params): elif self.learner.lower() == 'sparse_adam': optimizer = optim.SparseAdam(params, lr=self.learning_rate) if self.weight_decay > 0: - self.logger.warning('\033[1;31mSparse Adam cannot argument received argument [{weight_decay}]\033[0m') + self.logger.warning('\033[1;33mSparse Adam cannot argument received argument [{weight_decay}]\033[0m') else: - self.logger.warning('\033[1;31mReceived unrecognized optimizer, set default Adam optimizer\033[0m') + self.logger.warning('\033[1;33mReceived unrecognized optimizer, set default Adam optimizer\033[0m') optimizer = optim.Adam(params, lr=self.learning_rate) return optimizer @@ -213,8 +213,8 @@ def resume_checkpoint(self, resume_file): # load architecture params from checkpoint if checkpoint['config']['model'].lower() != self.config['model'].lower(): self.logger.warning( - '\033[1;31mArchitecture configuration given in config file is different from that of checkpoint.\33[0m ' - '\033[1;31mThis may yield an exception while state_dict is being loaded.\033[0m' + '\033[1;33mArchitecture configuration given in config file is different from that of checkpoint.\33[0m ' + '\033[1;33mThis may yield an exception while state_dict is being loaded.\033[0m' ) self.model.load_state_dict(checkpoint['state_dict']) @@ -225,7 +225,7 @@ def resume_checkpoint(self, resume_file): def _check_nan(self, loss): if torch.isnan(loss): - raise ValueError('Training loss is nan') + raise ValueError('\033[1;31mTraining loss is nan\033[0m') def _generate_train_loss_output(self, epoch_idx, s_time, e_time, losses): des = self.config['loss_decimal_place'] or 4 @@ -553,7 +553,7 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre elif self.model.train_stage == 'finetune': return super().fit(train_data, valid_data, verbose, saved, show_progress, callback_fn) else: - raise ValueError("Please make sure that the 'train_stage' is 'pretrain' or 'finetune' ") + raise ValueError("\033[1;31mPlease make sure that the 'train_stage' is 'pretrain' or 'finetune' \033[0m") class MKRTrainer(Trainer): diff --git a/recbole/utils/utils.py b/recbole/utils/utils.py index 9e2012372..a6c29eef8 100644 --- a/recbole/utils/utils.py +++ b/recbole/utils/utils.py @@ -65,7 +65,7 @@ def get_model(model_name): break if model_module is None: - raise ValueError('`model_name` [{}] is not the name of an existing model.'.format(model_name)) + raise ValueError('\033[1;31m`model_name` [{}] is not the name of an existing model.\033[0m'.format(model_name)) model_class = getattr(model_module, model_name) return model_class From 4ef5d7c74d1716fd0d892165dcf839d1f07f2792 Mon Sep 17 00:00:00 2001 From: Guan-JW <15692276873@163.com> Date: Sun, 7 Mar 2021 22:00:20 +0800 Subject: [PATCH 04/16] change color --- recbole/quick_start/quick_start.py | 1 - 1 file changed, 1 deletion(-) diff --git a/recbole/quick_start/quick_start.py b/recbole/quick_start/quick_start.py index 6f5bd3240..7062e584f 100644 --- a/recbole/quick_start/quick_start.py +++ b/recbole/quick_start/quick_start.py @@ -34,7 +34,6 @@ def run_recbole(model=None, dataset=None, config_file_list=None, config_dict=Non logger = getLogger() logger.info(config) - logger.debug('hhhh') # dataset filtering dataset = create_dataset(config) From dfc8322ed731c2c5947a9f21a9a3fc0df30a713f Mon Sep 17 00:00:00 2001 From: Guan-JW <15692276873@163.com> Date: Sun, 7 Mar 2021 22:49:23 +0800 Subject: [PATCH 05/16] change color --- recbole/config/configurator.py | 2 +- .../data/dataloader/abstract_dataloader.py | 2 +- .../data/dataloader/knowledge_dataloader.py | 2 +- recbole/data/dataloader/user_dataloader.py | 2 +- recbole/data/dataset/dataset.py | 26 +++++++++---------- recbole/data/dataset/sequential_dataset.py | 4 +-- recbole/evaluator/metrics.py | 18 ++++++------- .../context_aware_recommender/xdeepfm.py | 4 +-- recbole/model/general_recommender/fism.py | 6 ++--- recbole/model/general_recommender/gcmc.py | 2 +- recbole/model/general_recommender/nais.py | 6 ++--- recbole/trainer/trainer.py | 8 +++--- recbole/utils/logger.py | 13 +++++++--- 13 files changed, 51 insertions(+), 44 deletions(-) diff --git a/recbole/config/configurator.py b/recbole/config/configurator.py index 49f68eb58..dd444c31a 100644 --- a/recbole/config/configurator.py +++ b/recbole/config/configurator.py @@ -159,7 +159,7 @@ def _load_cmd_line(self): cmd_config_dict[cmd_arg_name] = cmd_arg_value if len(unrecognized_args) > 0: logger = getLogger() - logger.warning('\033[1;33mcommand line args [{}] will not be used in RecBole\033[0m'.format(' '.join(unrecognized_args))) + logger.warning('command line args [{}] will not be used in RecBole'.format(' '.join(unrecognized_args))) cmd_config_dict = self._convert_config_dict(cmd_config_dict) return cmd_config_dict diff --git a/recbole/data/dataloader/abstract_dataloader.py b/recbole/data/dataloader/abstract_dataloader.py index 2a28850f2..4c980252a 100644 --- a/recbole/data/dataloader/abstract_dataloader.py +++ b/recbole/data/dataloader/abstract_dataloader.py @@ -121,7 +121,7 @@ def set_batch_size(self, batch_size): raise PermissionError('\033[1;31mCannot change dataloader\'s batch_size while iteration\033[0m') if self.batch_size != batch_size: self.batch_size = batch_size - self.logger.warning(f'\033[1;33mBatch size is changed to {batch_size}\033[0m.') + self.logger.warning(f'Batch size is changed to {batch_size}.') def upgrade_batch_size(self, batch_size): """Upgrade the batch_size of the dataloader, if input batch_size is bigger than current batch_size. diff --git a/recbole/data/dataloader/knowledge_dataloader.py b/recbole/data/dataloader/knowledge_dataloader.py index dcb0d2d06..bfa5f80bb 100644 --- a/recbole/data/dataloader/knowledge_dataloader.py +++ b/recbole/data/dataloader/knowledge_dataloader.py @@ -55,7 +55,7 @@ def setup(self): """ if self.shuffle is False: self.shuffle = True - self.logger.warning('\033[1;33mkg based dataloader must shuffle the data\033[0m') + self.logger.warning('kg based dataloader must shuffle the data') @property def pr_end(self): diff --git a/recbole/data/dataloader/user_dataloader.py b/recbole/data/dataloader/user_dataloader.py index 8143af21d..2d2fd62a0 100644 --- a/recbole/data/dataloader/user_dataloader.py +++ b/recbole/data/dataloader/user_dataloader.py @@ -47,7 +47,7 @@ def setup(self): """ if self.shuffle is False: self.shuffle = True - self.logger.warning('\033[1;33mUserDataLoader must shuffle the data\033[0m') + self.logger.warning('UserDataLoader must shuffle the data') @property def pr_end(self): diff --git a/recbole/data/dataset/dataset.py b/recbole/data/dataset/dataset.py index 43756eeda..efa5a6abf 100644 --- a/recbole/data/dataset/dataset.py +++ b/recbole/data/dataset/dataset.py @@ -420,7 +420,7 @@ def _load_feat(self, filepath, source): dtype[field_type] = np.float64 if ftype == FeatureType.FLOAT else str if len(columns) == 0: - self.logger.warning(f'\033[1;33mNo columns has been loaded from [{source}]\033[0m') + self.logger.warning(f'No columns has been loaded from [{source}]') return None df = pd.read_csv(filepath, delimiter=self.config['field_separator'], usecols=usecols, dtype=dtype) @@ -505,8 +505,8 @@ def _preload_weight_matrix(self): matrix[pid] = prow[:max_len] else: self.logger.warning( - f'\033[1;33mField [{preload_value_field}] with type [{value_ftype}] is not `float` or `float_seq`, \033[0m' - f'\033[1;33mwhich will not be handled by preload matrix.\033[0m' + f'Field [{preload_value_field}] with type [{value_ftype}] is not `float` or `float_seq`, \033[0m' + f'which will not be handled by preload matrix.' ) continue self._preloaded_weight[preload_id_field] = matrix @@ -554,7 +554,7 @@ def _normalize(self): if field not in self.field2type: raise ValueError(f'\033[1;31mField [{field}] does not exist.\033[0m') elif ftype != FeatureType.FLOAT and ftype != FeatureType.FLOAT_SEQ: - self.logger.warning(f'\033[1;33m{field} is not a FLOAT/FLOAT_SEQ feat, which will not be normalized.\033[0m') + self.logger.warning(f'{field} is not a FLOAT/FLOAT_SEQ feat, which will not be normalized.') elif self.config['normalize_all']: fields = self.float_like_fields else: @@ -572,7 +572,7 @@ def _normalize(self): lst = feat[field].values mx, mn = max(lst), min(lst) if mx == mn: - self.logger.warning(f'\033[1;33mAll the same value in [{field}] from [{feat}_feat].\033[0m') + self.logger.warning(f'All the same value in [{field}] from [{feat}_feat].') feat[field] = 1.0 else: feat[field] = (lst - mn) / (mx - mn) @@ -581,7 +581,7 @@ def _normalize(self): lst = feat[field].agg(np.concatenate) mx, mn = max(lst), min(lst) if mx == mn: - self.logger.warning(f'\033[1;33mAll the same value in [{field}] from [{feat}_feat].\033[0m') + self.logger.warning(f'All the same value in [{field}] from [{feat}_feat].') lst = 1.0 else: lst = (lst - mn) / (mx - mn) @@ -597,14 +597,14 @@ def _filter_nan_user_or_item(self): dropped_feat = feat.index[feat[field].isnull()] if len(dropped_feat): self.logger.warning( - f'\033[1;33mIn {name}_feat, line {list(dropped_feat + 2)}, {field} do not exist, so they will be removed.\033[0m' + f'In {name}_feat, line {list(dropped_feat + 2)}, {field} do not exist, so they will be removed.' ) feat.drop(feat.index[dropped_feat], inplace=True) if field is not None: dropped_inter = self.inter_feat.index[self.inter_feat[field].isnull()] if len(dropped_inter): self.logger.warning( - f'\033[1;33mIn inter_feat, line {list(dropped_inter + 2)}, {field} do not exist, so they will be removed.\033[0m' + f'In inter_feat, line {list(dropped_inter + 2)}, {field} do not exist, so they will be removed.' ) self.inter_feat.drop(self.inter_feat.index[dropped_inter], inplace=True) @@ -629,8 +629,8 @@ def _remove_duplication(self): ) else: self.logger.warning( - f'\033[1;33mTimestamp field has not been loaded or specified, \033[0m' - f'\033[1;33mthus strategy [{keep}] of duplication removal may be meaningless.\033[0m' + f'Timestamp field has not been loaded or specified, ' + f'thus strategy [{keep}] of duplication removal may be meaningless.' ) self.inter_feat.drop_duplicates(subset=[self.uid_field, self.iid_field], keep=keep, inplace=True) @@ -1247,7 +1247,7 @@ def _drop_unused_col(self): for field in unused_fields: if field not in feat: self.logger.warning( - f'\033[1;33mField [{field}] is not in [{feat_name}_feat], which can not be set in `unused_col`.\033[0m' + f'Field [{field}] is not in [{feat_name}_feat], which can not be set in `unused_col`.' ) continue self._del_col(feat, field) @@ -1615,8 +1615,8 @@ def _history_matrix(self, row, value_field=None): col_num = np.max(history_len) if col_num > max_col_num * 0.2: self.logger.warning( - f'\033[1;33mMax value of {row}\'s history interaction records has reached \033[0m' - f'\033[1;33m{col_num / max_col_num * 100}% of the total.\033[0m' + f'Max value of {row}\'s history interaction records has reached ' + f'{col_num / max_col_num * 100}% of the total.' ) history_matrix = np.zeros((row_num, col_num), dtype=np.int64) diff --git a/recbole/data/dataset/sequential_dataset.py b/recbole/data/dataset/sequential_dataset.py index 43cc175c9..428ea3886 100644 --- a/recbole/data/dataset/sequential_dataset.py +++ b/recbole/data/dataset/sequential_dataset.py @@ -123,8 +123,8 @@ def inter_matrix(self, form='coo', value_field=None): if not self.uid_field or not self.iid_field: raise ValueError('\033[1;31mdataset does not exist uid/iid, thus can not converted to sparse matrix.\033[0m') - self.logger.warning('\033[1;33mLoad interaction matrix may lead to label leakage from testing phase, this implementation \033[0m' - '\033[1;33monly provides the interactions corresponding to specific phase\033[0m') + self.logger.warning('Load interaction matrix may lead to label leakage from testing phase, this implementation ' + 'monly provides the interactions corresponding to specific phase') local_inter_feat = self.inter_feat[self.uid_list] return self._create_sparse_matrix(local_inter_feat, self.uid_field, self.iid_field, form, value_field) diff --git a/recbole/evaluator/metrics.py b/recbole/evaluator/metrics.py index 4c18798de..eada10cb5 100644 --- a/recbole/evaluator/metrics.py +++ b/recbole/evaluator/metrics.py @@ -166,17 +166,17 @@ def gauc_(user_len_list, pos_len_list, pos_rank_sum): if any_without_pos: logger = getLogger() logger.warning( - "\033[1;33mNo positive samples in some users, \033[0m" - "\033[1;33mtrue positive value should be meaningless, \033[0m" - "\033[1;33mthese users have been removed from GAUC calculation\033[0m" + "No positive samples in some users, " + "true positive value should be meaningless, " + "these users have been removed from GAUC calculation" ) non_zero_idx *= (pos_len_list != 0) if any_without_neg: logger = getLogger() logger.warning( - "\033[1;33mNo negative samples in some users, \033[0m" - "\033[1;33mfalse positive value should be meaningless, \033[0m" - "\033[1;33mthese users have been removed from GAUC calculation\033[0m" + "No negative samples in some users, " + "false positive value should be meaningless, " + "these users have been removed from GAUC calculation" ) non_zero_idx *= (neg_len_list != 0) if any_without_pos or any_without_neg: @@ -219,14 +219,14 @@ def auc_(trues, preds): if fps[-1] <= 0: logger = getLogger() - logger.warning("\033[1;33mNo negative samples in y_true, \033[0m" "\033[1;33mfalse positive value should be meaningless\033[0m") + logger.warning("No negative samples in y_true, " "false positive value should be meaningless") fpr = np.repeat(np.nan, fps.shape) else: fpr = fps / fps[-1] if tps[-1] <= 0: logger = getLogger() - logger.warning("\033[1;33mNo positive samples in y_true, \033[0m" "\033[1;33mtrue positive value should be meaningless\033[0m") + logger.warning("No positive samples in y_true, " "true positive value should be meaningless") tpr = np.repeat(np.nan, tps.shape) else: tpr = tps / tps[-1] @@ -279,7 +279,7 @@ def log_loss_(trues, preds): # TODO # def coverage_(): -# raise NotImplemented +# raise NotImplementedError # def gini_index_(): # raise NotImplementedError diff --git a/recbole/model/context_aware_recommender/xdeepfm.py b/recbole/model/context_aware_recommender/xdeepfm.py index 527a5664b..0af6ef770 100644 --- a/recbole/model/context_aware_recommender/xdeepfm.py +++ b/recbole/model/context_aware_recommender/xdeepfm.py @@ -49,8 +49,8 @@ def __init__(self, config, dataset): self.cin_layer_size = list(map(lambda x: int(x // 2 * 2), temp_cin_size)) if self.cin_layer_size[:-1] != temp_cin_size[:-1]: self.logger.warning( - '\033[1;33mLayer size of CIN should be even except for the last layer when direct is True.\033[0m' - '\033[1;33mIt is changed to {}\033[0m'.format(self.cin_layer_size) + 'Layer size of CIN should be even except for the last layer when direct is True.' + 'It is changed to {}'.format(self.cin_layer_size) ) # Create a convolutional layer for each CIN layer diff --git a/recbole/model/general_recommender/fism.py b/recbole/model/general_recommender/fism.py index 96a94fa63..fdfecc216 100644 --- a/recbole/model/general_recommender/fism.py +++ b/recbole/model/general_recommender/fism.py @@ -49,9 +49,9 @@ def __init__(self, config, dataset): if self.split_to > 0: self.group = torch.chunk(torch.arange(self.n_items).to(self.device), self.split_to) else: - self.logger.warning('\033[1;33mPay Attetion!! the `split_to` is set to 0. If you catch a OMM error in this case,\033[0m ' + \ - '\033[1;33myou need to increase it \n\t\t\tuntil the error disappears. For example, \033[0m' + \ - '\033[1;33myou can append it in the command line such as `--split_to=5`\033[0m') + self.logger.warning('Pay Attetion!! the `split_to` is set to 0. If you catch a OMM error in this case, ' + \ + 'you need to increase it \n\t\t\tuntil the error disappears. For example, ' + \ + 'you can append it in the command line such as `--split_to=5`') # define layers and loss # construct source and destination item embedding matrix diff --git a/recbole/model/general_recommender/gcmc.py b/recbole/model/general_recommender/gcmc.py index a7e3afc73..e3715493d 100644 --- a/recbole/model/general_recommender/gcmc.py +++ b/recbole/model/general_recommender/gcmc.py @@ -93,7 +93,7 @@ def __init__(self, config, dataset): div = self.gcn_output_dim // len(self.support) if self.gcn_output_dim % len(self.support) != 0: self.logger.warning( - "\033[1;33mHIDDEN[0] (=%d) of stack layer is adjusted to %d (in %d splits).\033[0m" % + "HIDDEN[0] (=%d) of stack layer is adjusted to %d (in %d splits)." % (self.gcn_output_dim, len(self.support) * div, len(self.support)) ) self.gcn_output_dim = len(self.support) * div diff --git a/recbole/model/general_recommender/nais.py b/recbole/model/general_recommender/nais.py index 8726fe14f..c7e669b8e 100644 --- a/recbole/model/general_recommender/nais.py +++ b/recbole/model/general_recommender/nais.py @@ -64,9 +64,9 @@ def __init__(self, config, dataset): self.logger.info('split the n_items to {} pieces'.format(self.split_to)) self.group = torch.chunk(torch.arange(self.n_items).to(self.device), self.split_to) else: - self.logger.warning('\033[1;33mPay Attetion!! the `split_to` is set to 0. If you catch a OMM error in this case,\033[0m] ' + \ - '\033[1;33myou need to increase it \n\t\t\tuntil the error disappears. For example,\033[0m ' + \ - '\033[1;33myou can append it in the command line such as `--split_to=5`\033[0m') + self.logger.warning('Pay Attetion!! the `split_to` is set to 0. If you catch a OMM error in this case, ' + \ + 'you need to increase it \n\t\t\tuntil the error disappears. For example, ' + \ + 'you can append it in the command line such as `--split_to=5`') # define layers and loss # construct source and destination item embedding matrix diff --git a/recbole/trainer/trainer.py b/recbole/trainer/trainer.py index 655601d6d..f7da7e4ae 100644 --- a/recbole/trainer/trainer.py +++ b/recbole/trainer/trainer.py @@ -116,9 +116,9 @@ def _build_optimizer(self, params): elif self.learner.lower() == 'sparse_adam': optimizer = optim.SparseAdam(params, lr=self.learning_rate) if self.weight_decay > 0: - self.logger.warning('\033[1;33mSparse Adam cannot argument received argument [{weight_decay}]\033[0m') + self.logger.warning('Sparse Adam cannot argument received argument [{weight_decay}]') else: - self.logger.warning('\033[1;33mReceived unrecognized optimizer, set default Adam optimizer\033[0m') + self.logger.warning('Received unrecognized optimizer, set default Adam optimizer') optimizer = optim.Adam(params, lr=self.learning_rate) return optimizer @@ -213,8 +213,8 @@ def resume_checkpoint(self, resume_file): # load architecture params from checkpoint if checkpoint['config']['model'].lower() != self.config['model'].lower(): self.logger.warning( - '\033[1;33mArchitecture configuration given in config file is different from that of checkpoint.\33[0m ' - '\033[1;33mThis may yield an exception while state_dict is being loaded.\033[0m' + 'Architecture configuration given in config file is different from that of checkpoint. ' + 'This may yield an exception while state_dict is being loaded.' ) self.model.load_state_dict(checkpoint['state_dict']) diff --git a/recbole/utils/logger.py b/recbole/utils/logger.py index 76559710f..96f5d3025 100644 --- a/recbole/utils/logger.py +++ b/recbole/utils/logger.py @@ -10,9 +10,16 @@ import logging import os +import colorlog from recbole.utils.utils import get_local_time, ensure_dir +log_colors_config = { + 'DEBUG': 'cyan', + 'WARNING': 'yellow', + 'ERROR': 'red', + 'CRITICAL': 'red', +} def init_logger(config): """ @@ -36,13 +43,13 @@ def init_logger(config): logfilepath = os.path.join(LOGROOT, logfilename) - filefmt = "%(asctime)-15s %(levelname)s %(message)s" + filefmt = "%(asctime)-15s %(levelname)s %(message)s" filedatefmt = "%a %d %b %Y %H:%M:%S" fileformatter = logging.Formatter(filefmt, filedatefmt) - sfmt = "%(asctime)-15s %(levelname)s %(message)s" + sfmt = "%(log_color)s%(asctime)-15s %(levelname)s %(message)s" sdatefmt = "%d %b %H:%M" - sformatter = logging.Formatter(sfmt, sdatefmt) + sformatter = colorlog.ColoredFormatter(sfmt, sdatefmt, log_colors=log_colors_config) if config['state'] is None or config['state'].lower() == 'info': level = logging.INFO elif config['state'].lower() == 'debug': From 996bcb3a753f3ed6d757fc08753a7f97d9c2e28b Mon Sep 17 00:00:00 2001 From: Guan-JW <15692276873@163.com> Date: Sun, 7 Mar 2021 22:53:05 +0800 Subject: [PATCH 06/16] add colorlog --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 46f740e11..dcbae1f01 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ pandas>=1.0.5 tqdm>=4.48.2 scikit_learn>=0.23.2 pyyaml>=5.1.0 +colorlog=4.7.2 \ No newline at end of file From 5fcb9a2cd5769af561bd47af547f5ff62b3f1ed5 Mon Sep 17 00:00:00 2001 From: Guan-JW <15692276873@163.com> Date: Sun, 7 Mar 2021 23:54:52 +0800 Subject: [PATCH 07/16] add requirement --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index dcbae1f01..faf49dc2d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,4 @@ pandas>=1.0.5 tqdm>=4.48.2 scikit_learn>=0.23.2 pyyaml>=5.1.0 -colorlog=4.7.2 \ No newline at end of file +colorlog \ No newline at end of file From ceabdba53041da76cabb6eb3b0023b6b856f3037 Mon Sep 17 00:00:00 2001 From: Guan-JW <15692276873@163.com> Date: Mon, 8 Mar 2021 10:05:07 +0800 Subject: [PATCH 08/16] change color --- recbole/utils/logger.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/recbole/utils/logger.py b/recbole/utils/logger.py index 96f5d3025..f3afbd3c9 100644 --- a/recbole/utils/logger.py +++ b/recbole/utils/logger.py @@ -3,6 +3,11 @@ # @Author : Zihan Lin # @Email : linzihan.super@foxmail.com +# UPDATE +# @Time : 2021/3/7 +# @Author : Jiawei Guan +# @Email : guanjw@ruc.edu.cn + """ recbole.utils.logger ############################### From 916bfde662b8faa4b09f7d98a5e1b0fcce9a01b3 Mon Sep 17 00:00:00 2001 From: Guan-JW <15692276873@163.com> Date: Mon, 8 Mar 2021 10:12:30 +0800 Subject: [PATCH 09/16] change color --- recbole/data/dataset/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recbole/data/dataset/dataset.py b/recbole/data/dataset/dataset.py index efa5a6abf..7114585dd 100644 --- a/recbole/data/dataset/dataset.py +++ b/recbole/data/dataset/dataset.py @@ -505,7 +505,7 @@ def _preload_weight_matrix(self): matrix[pid] = prow[:max_len] else: self.logger.warning( - f'Field [{preload_value_field}] with type [{value_ftype}] is not `float` or `float_seq`, \033[0m' + f'Field [{preload_value_field}] with type [{value_ftype}] is not `float` or `float_seq`, ' f'which will not be handled by preload matrix.' ) continue From 5a0d650dae10aa21c28f20f081cf9605a20313a6 Mon Sep 17 00:00:00 2001 From: Guan-JW <15692276873@163.com> Date: Mon, 8 Mar 2021 10:14:05 +0800 Subject: [PATCH 10/16] change color --- recbole/data/dataset/sequential_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recbole/data/dataset/sequential_dataset.py b/recbole/data/dataset/sequential_dataset.py index 428ea3886..60aaa83f5 100644 --- a/recbole/data/dataset/sequential_dataset.py +++ b/recbole/data/dataset/sequential_dataset.py @@ -124,7 +124,7 @@ def inter_matrix(self, form='coo', value_field=None): raise ValueError('\033[1;31mdataset does not exist uid/iid, thus can not converted to sparse matrix.\033[0m') self.logger.warning('Load interaction matrix may lead to label leakage from testing phase, this implementation ' - 'monly provides the interactions corresponding to specific phase') + 'only provides the interactions corresponding to specific phase') local_inter_feat = self.inter_feat[self.uid_list] return self._create_sparse_matrix(local_inter_feat, self.uid_field, self.iid_field, form, value_field) From 33e1acbf677755679c4e86d9393ade57d7b4c2c8 Mon Sep 17 00:00:00 2001 From: Guan-JW <15692276873@163.com> Date: Mon, 8 Mar 2021 10:15:28 +0800 Subject: [PATCH 11/16] change color --- recbole/data/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recbole/data/utils.py b/recbole/data/utils.py index 794974604..f7b3e965f 100644 --- a/recbole/data/utils.py +++ b/recbole/data/utils.py @@ -301,4 +301,4 @@ def decorator(f): return decorator -dlapi = DLFriendlyAPI() \ No newline at end of file +dlapi = DLFriendlyAPI() From f13192e08be0c900e07303bd683b08235df2cb1f Mon Sep 17 00:00:00 2001 From: Guan-JW <15692276873@163.com> Date: Mon, 8 Mar 2021 10:19:23 +0800 Subject: [PATCH 12/16] change color --- recbole/trainer/hyper_tuning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recbole/trainer/hyper_tuning.py b/recbole/trainer/hyper_tuning.py index 017cbf243..4853037e9 100644 --- a/recbole/trainer/hyper_tuning.py +++ b/recbole/trainer/hyper_tuning.py @@ -76,7 +76,7 @@ def _validate_space_exhaustive_search(space): if node.name in implicit_stochastic_symbols: if node.name not in supported_stochastic_symbols: raise ExhaustiveSearchError( - '\033[1;31mExhaustive search is only possible with the following stochastic symbols: ' + '\033[1;31mExhaustive search is only possible with the following stochastic symbols: \033[0m' '\033[1;31m' + ', '.join(supported_stochastic_symbols) ) From cf90ae32325a5d0a82ea9502ee040144bb1ed105 Mon Sep 17 00:00:00 2001 From: Guan-JW <15692276873@163.com> Date: Mon, 8 Mar 2021 23:02:12 +0800 Subject: [PATCH 13/16] add set_color --- recbole/config/configurator.py | 29 +++-- recbole/config/eval_setting.py | 50 +++---- .../data/dataloader/abstract_dataloader.py | 8 +- recbole/data/dataloader/general_dataloader.py | 4 +- .../data/dataloader/knowledge_dataloader.py | 6 +- recbole/data/dataloader/neg_sample_mixin.py | 18 +-- .../data/dataloader/sequential_dataloader.py | 2 +- recbole/data/dataset/dataset.py | 123 +++++++++--------- recbole/data/dataset/kg_dataset.py | 25 ++-- recbole/data/dataset/sequential_dataset.py | 14 +- recbole/data/dataset/social_dataset.py | 10 +- recbole/data/interaction.py | 24 ++-- recbole/data/utils.py | 35 +++-- recbole/evaluator/abstract_evaluator.py | 4 +- recbole/evaluator/evaluators.py | 6 +- recbole/evaluator/proxy_evaluator.py | 4 +- recbole/evaluator/utils.py | 2 +- recbole/model/abstract_recommender.py | 5 +- recbole/model/general_recommender/cdae.py | 6 +- recbole/model/general_recommender/dmf.py | 2 +- recbole/model/general_recommender/nais.py | 2 +- recbole/model/general_recommender/neumf.py | 2 +- recbole/model/general_recommender/nncf.py | 4 +- recbole/model/layers.py | 12 +- .../model/sequential_recommender/bert4rec.py | 4 +- recbole/model/sequential_recommender/caser.py | 2 +- recbole/model/sequential_recommender/fdsa.py | 2 +- .../model/sequential_recommender/fossil.py | 2 +- recbole/model/sequential_recommender/gcsan.py | 2 +- .../model/sequential_recommender/gru4rec.py | 2 +- .../model/sequential_recommender/gru4recf.py | 2 +- .../model/sequential_recommender/gru4reckg.py | 2 +- recbole/model/sequential_recommender/hgn.py | 4 +- recbole/model/sequential_recommender/hrm.py | 2 +- recbole/model/sequential_recommender/ksr.py | 2 +- recbole/model/sequential_recommender/narm.py | 2 +- .../model/sequential_recommender/nextitnet.py | 2 +- recbole/model/sequential_recommender/npe.py | 2 +- recbole/model/sequential_recommender/s3rec.py | 2 +- .../model/sequential_recommender/sasrec.py | 2 +- .../model/sequential_recommender/sasrecf.py | 2 +- recbole/model/sequential_recommender/shan.py | 2 +- recbole/model/sequential_recommender/srgnn.py | 2 +- recbole/model/sequential_recommender/stamp.py | 2 +- recbole/quick_start/quick_start.py | 5 +- recbole/sampler/sampler.py | 32 ++--- recbole/trainer/hyper_tuning.py | 12 +- recbole/trainer/trainer.py | 43 +++--- recbole/utils/utils.py | 22 +++- 49 files changed, 297 insertions(+), 258 deletions(-) diff --git a/recbole/config/configurator.py b/recbole/config/configurator.py index dd444c31a..e6c3fe997 100644 --- a/recbole/config/configurator.py +++ b/recbole/config/configurator.py @@ -22,6 +22,7 @@ from recbole.evaluator import group_metrics, individual_metrics from recbole.utils import get_model, Enum, EvaluatorType, ModelType, InputType, \ general_arguments, training_arguments, evaluation_arguments, dataset_arguments +from recbole.utils.utils import set_color class Config(object): @@ -154,7 +155,7 @@ def _load_cmd_line(self): continue cmd_arg_name, cmd_arg_value = arg[2:].split("=") if cmd_arg_name in cmd_config_dict and cmd_arg_value != cmd_config_dict[cmd_arg_name]: - raise SyntaxError("\033[1;31mThere are duplicate commend arg '%s' with different value.\033[0m" % arg) + raise SyntaxError("There are duplicate commend arg '%s' with different value." % arg) else: cmd_config_dict[cmd_arg_name] = cmd_arg_value if len(unrecognized_args) > 0: @@ -177,8 +178,8 @@ def _get_model_and_dataset(self, model, dataset): model = self.external_config_dict['model'] except KeyError: raise KeyError( - '\033[1;31mmodel need to be specified in at least one of the these ways: \033[0m' - '\033[1;31m[model variable, config file, config dict, command line] \033[0m' + 'model need to be specified in at least one of the these ways: ' + '[model variable, config file, config dict, command line] ' ) if not isinstance(model, str): final_model_class = model @@ -192,8 +193,8 @@ def _get_model_and_dataset(self, model, dataset): final_dataset = self.external_config_dict['dataset'] except KeyError: raise KeyError( - '\033[1;31mdataset need to be specified in at least one of the these ways: \033[0m' - '\033[1;31m[dataset variable, config file, config dict, command line] \033[0m' + 'dataset need to be specified in at least one of the these ways: ' + '[dataset variable, config file, config dict, command line] ' ) else: final_dataset = dataset @@ -279,18 +280,18 @@ def _set_default_parameters(self): elif self.final_config_dict['loss_type'] in ['BPR']: self.final_config_dict['MODEL_INPUT_TYPE'] = InputType.PAIRWISE else: - raise ValueError('\033[1;31mEither Model has attr \'input_type\',' 'or arg \'loss_type\' should exist in config.\033[0m') + raise ValueError('Either Model has attr \'input_type\',' 'or arg \'loss_type\' should exist in config.') eval_type = None for metric in self.final_config_dict['metrics']: if metric.lower() in individual_metrics: if eval_type is not None and eval_type == EvaluatorType.RANKING: - raise RuntimeError('\033[1;31mRanking metrics and other metrics can not be used at the same time.\033[0m') + raise RuntimeError('Ranking metrics and other metrics can not be used at the same time.') else: eval_type = EvaluatorType.INDIVIDUAL if metric.lower() in group_metrics: if eval_type is not None and eval_type == EvaluatorType.INDIVIDUAL: - raise RuntimeError('\033[1;31mRanking metrics and other metrics can not be used at the same time.\033[0m') + raise RuntimeError('Ranking metrics and other metrics can not be used at the same time.') else: eval_type = EvaluatorType.RANKING self.final_config_dict['eval_type'] = eval_type @@ -322,7 +323,7 @@ def _set_train_neg_sample_args(self): def __setitem__(self, key, value): if not isinstance(key, str): - raise TypeError("\033[1;31mindex must be a str.\033[0m") + raise TypeError("index must be a str.") self.final_config_dict[key] = value def __getitem__(self, item): @@ -333,22 +334,22 @@ def __getitem__(self, item): def __contains__(self, key): if not isinstance(key, str): - raise TypeError("\033[1;31mindex must be a str.\033[0m") + raise TypeError("index must be a str.") return key in self.final_config_dict def __str__(self): args_info = '\n' for category in self.parameters: - args_info += '\033[1;35m' + category + ' Hyper Parameters: \033[0m\n' + args_info += set_color(category + ' Hyper Parameters:\n', 'pink') args_info += '\n'.join([ - "\033[0;36m{}\033[0m = \033[33m{}\033[0m".format(arg, value) for arg, value in self.final_config_dict.items() + (set_color("{}", 'cyan') + " =" + set_color(" {}", 'yellow')).format(arg, value) for arg, value in self.final_config_dict.items() if arg in self.parameters[category] ]) args_info += '\n\n' - args_info += '\033[1;35mOther Hyper Parameters: \033[0m\n' + args_info += set_color('Other Hyper Parameters: \n', 'pink') args_info += '\n'.join([ - "\033[0;36m{}\033[0m = \033[33m{}\033[0m".format(arg, value) for arg, value in self.final_config_dict.items() + (set_color("{}", 'cyan') + " = " + set_color("{}", 'yellow')).format(arg, value) for arg, value in self.final_config_dict.items() if arg not in {_ for args in self.parameters.values() for _ in args}.union({'model', 'dataset', 'config_files'}) ]) args_info += '\n\n' diff --git a/recbole/config/eval_setting.py b/recbole/config/eval_setting.py index 7b22bcaad..da11806c1 100644 --- a/recbole/config/eval_setting.py +++ b/recbole/config/eval_setting.py @@ -12,6 +12,8 @@ ################################ """ +from recbole.utils.utils import set_color + class EvalSetting(object): """Class containing settings about model evaluation. @@ -82,7 +84,7 @@ def __init__(self, config): self.set_ordering_and_splitting(self.es_str[0]) if len(self.es_str) > 1: if getattr(self, self.es_str[1], None) == None: - raise ValueError('\033[1;31mIncorrect setting of negative sampling.') + raise ValueError('Incorrect setting of negative sampling.') getattr(self, self.es_str[1])() presetting_args = ['group_field', 'ordering_args', 'split_args', 'neg_sample_args'] for args in presetting_args: @@ -90,27 +92,27 @@ def __init__(self, config): setattr(self, args, config[args]) def __str__(self): - info = ['\033[1;35mEvaluation Setting:\033[0m'] + info = [set_color('Evaluation Setting:', 'pink')] if self.group_field: - info.append('\033[1;34mGroup by\033[0m {}'.format(self.group_field)) + info.append(set_color('Group by', 'blue') + ' {}'.format(self.group_field)) else: - info.append('\033[0;33mNo Grouping\033[0m') + info.append(set_color('No Grouping', 'yellow')) if self.ordering_args is not None and self.ordering_args['strategy'] != 'none': - info.append('\033[1;34mOrdering\033[0m: {}'.format(self.ordering_args)) + info.append(set_color('Ordering', 'blue') + ': {}'.format(self.ordering_args)) else: - info.append('\033[0;33mNo Ordering\033[0m') + info.append(set_color('No Ordering', 'yellow')) if self.split_args is not None and self.split_args['strategy'] != 'none': - info.append('\033[1;34mSplitting\033[0m: {}'.format(self.split_args)) + info.append(set_color('Splitting', 'blue') + ': {}'.format(self.split_args)) else: - info.append('\033[0;33mNo Splitting\033[0m') + info.append(set_color('No Splitting', 'yellow')) if self.neg_sample_args is not None and self.neg_sample_args['strategy'] != 'none': - info.append('\033[1;34mNegative Sampling\033[0m: {}'.format(self.neg_sample_args)) + info.append(set_color('Negative Sampling', 'blue') + ': {}'.format(self.neg_sample_args)) else: - info.append('\033[0;33mNo Negative Sampling\033[0m') + info.append(set_color('No Negative Sampling', 'yellow')) return '\n\t'.join(info) @@ -159,7 +161,7 @@ def set_ordering(self, strategy='none', **kwargs): """ legal_strategy = {'none', 'shuffle', 'by'} if strategy not in legal_strategy: - raise ValueError('\033[1;31mOrdering Strategy [{}] should in {}\033[0m'.format(strategy, list(legal_strategy))) + raise ValueError('Ordering Strategy [{}] should in {}'.format(strategy, list(legal_strategy))) self.ordering_args = {'strategy': strategy} self.ordering_args.update(kwargs) @@ -208,9 +210,9 @@ def set_splitting(self, strategy='none', **kwargs): """ legal_strategy = {'none', 'by_ratio', 'by_value', 'loo'} if strategy not in legal_strategy: - raise ValueError('\033[1;31mSplit Strategy [{}] should in {}\033[0m'.format(strategy, list(legal_strategy))) + raise ValueError('Split Strategy [{}] should in {}'.format(strategy, list(legal_strategy))) if strategy == 'loo' and self.group_field is None: - raise ValueError('\033[1;31mLeave-One-Out request group firstly\033[0m') + raise ValueError('Leave-One-Out request group firstly') self.split_args = {'strategy': strategy} self.split_args.update(kwargs) @@ -225,7 +227,7 @@ def leave_one_out(self, leave_one_num=1): E.g. ``leave_one_num = 2`` if you have one validation dataset and one test dataset. """ if self.group_field is None: - raise ValueError('\033[1;31mLeave one out request grouped dataset, please set group field.\033[0m') + raise ValueError('Leave one out request grouped dataset, please set group field.') self.set_splitting(strategy='loo', leave_one_num=leave_one_num) def split_by_ratio(self, ratios): @@ -236,13 +238,13 @@ def split_by_ratio(self, ratios): No need to normalize. It's ok with either `[0.8, 0.1, 0.1]`, `[8, 1, 1]` or `[56, 7, 7]` """ if not isinstance(ratios, list): - raise ValueError('\033[1;31mratios [{}] should be list\033[0m'.format(ratios)) + raise ValueError('ratios [{}] should be list'.format(ratios)) self.set_splitting(strategy='by_ratio', ratios=ratios) def _split_by_value(self, field, values, ascending=True): - raise NotImplementedError('\033[1;31mSplit by value has not been implemented.\033[0m') + raise NotImplementedError('Split by value has not been implemented.') if not isinstance(field, str): - raise ValueError('\033[1;31mfield [{}] should be str\033[0m'.format(field)) + raise ValueError('field [{}] should be str'.format(field)) if not isinstance(values, list): values = [values] values.sort(reverse=(not ascending)) @@ -262,9 +264,9 @@ def set_neg_sampling(self, strategy='none', distribution='uniform', **kwargs): """ legal_strategy = {'none', 'full', 'by'} if strategy not in legal_strategy: - raise ValueError('\033[1;31mNegative Sampling Strategy [{}] should in {}\033[0m'.format(strategy, list(legal_strategy))) + raise ValueError('Negative Sampling Strategy [{}] should in {}'.format(strategy, list(legal_strategy))) if strategy == 'full' and distribution != 'uniform': - raise ValueError('\033[1;31mFull Sort can not be sampled by distribution [{}]\033[0m'.format(distribution)) + raise ValueError('Full Sort can not be sampled by distribution [{}]'.format(distribution)) self.neg_sample_args = {'strategy': strategy, 'distribution': distribution} self.neg_sample_args.update(kwargs) @@ -285,7 +287,7 @@ def set_ordering_and_splitting(self, es_str): """ args = es_str.split('_') if len(args) != 2: - raise ValueError(f'\033[1;31m`{es_str}` is invalid eval_setting.\0mm[0m') + raise ValueError(f'`{es_str}` is invalid eval_setting.') ordering_args, split_args = args if self.config['group_by_user']: @@ -296,20 +298,20 @@ def set_ordering_and_splitting(self, es_str): elif ordering_args == 'TO': self.temporal_ordering() else: - raise NotImplementedError(f'\033[1;31mOrdering args `{ordering_args}` is not implemented.\033[0m') + raise NotImplementedError(f'Ordering args `{ordering_args}` is not implemented.') if split_args == 'RS': ratios = self.config['split_ratio'] if ratios is None: - raise ValueError('\033[1;31m`ratios` should be set if `RS` is set.\033[0m') + raise ValueError('`ratios` should be set if `RS` is set.') self.split_by_ratio(ratios) elif split_args == 'LS': leave_one_num = self.config['leave_one_num'] if leave_one_num is None: - raise ValueError('\033[1;31m`leave_one_num` should be set if `LS` is set.\033[0m') + raise ValueError('`leave_one_num` should be set if `LS` is set.') self.leave_one_out(leave_one_num=leave_one_num) else: - raise NotImplementedError(f'\033[1;31mSplit args `{split_args}` is not implemented.\033[0m') + raise NotImplementedError(f'Split args `{split_args}` is not implemented.') def RO_RS(self, ratios=(0.8, 0.1, 0.1), group_by_user=True): """Preset about Random Ordering and Ratio-based Splitting. diff --git a/recbole/data/dataloader/abstract_dataloader.py b/recbole/data/dataloader/abstract_dataloader.py index 4c980252a..73e642472 100644 --- a/recbole/data/dataloader/abstract_dataloader.py +++ b/recbole/data/dataloader/abstract_dataloader.py @@ -96,12 +96,12 @@ def __next__(self): @property def pr_end(self): """This property marks the end of dataloader.pr which is used in :meth:`__next__()`.""" - raise NotImplementedError('\033[1;31mMethod [pr_end] should be implemented\033[0m') + raise NotImplementedError('Method [pr_end] should be implemented') def _shuffle(self): """Shuffle the order of data, and it will be called by :meth:`__iter__()` if self.shuffle is True. """ - raise NotImplementedError('\033[1;31mMethod [shuffle] should be implemented.\033[0m') + raise NotImplementedError('Method [shuffle] should be implemented.') def _next_batch_data(self): """Assemble next batch of data in form of Interaction, and return these data. @@ -109,7 +109,7 @@ def _next_batch_data(self): Returns: Interaction: The next batch of data. """ - raise NotImplementedError('\033[1;31mMethod [next_batch_data] should be implemented.\033[0m') + raise NotImplementedError('Method [next_batch_data] should be implemented.') def set_batch_size(self, batch_size): """Reset the batch_size of the dataloader, but it can't be called when dataloader is being iterated. @@ -118,7 +118,7 @@ def set_batch_size(self, batch_size): batch_size (int): the new batch_size of dataloader. """ if self.pr != 0: - raise PermissionError('\033[1;31mCannot change dataloader\'s batch_size while iteration\033[0m') + raise PermissionError('Cannot change dataloader\'s batch_size while iteration') if self.batch_size != batch_size: self.batch_size = batch_size self.logger.warning(f'Batch size is changed to {batch_size}.') diff --git a/recbole/data/dataloader/general_dataloader.py b/recbole/data/dataloader/general_dataloader.py index 559677b55..818571586 100644 --- a/recbole/data/dataloader/general_dataloader.py +++ b/recbole/data/dataloader/general_dataloader.py @@ -207,7 +207,7 @@ def __init__( self, config, dataset, sampler, neg_sample_args, batch_size=1, dl_format=InputType.POINTWISE, shuffle=False ): if neg_sample_args['strategy'] != 'full': - raise ValueError('\033[1;31mneg_sample strategy in GeneralFullDataLoader() should be `full`\033[0m') + raise ValueError('neg_sample strategy in GeneralFullDataLoader() should be `full`') uid_field = dataset.uid_field iid_field = dataset.iid_field @@ -259,7 +259,7 @@ def pr_end(self): return len(self.uid_list) def _shuffle(self): - self.logger.warnning('\033[1;31mGeneralFullDataLoader can\'t shuffle\033[0m') + self.logger.warnning('GeneralFullDataLoader can\'t shuffle') def _next_batch_data(self): user_df = self.user_df[self.pr:self.pr + self.step] diff --git a/recbole/data/dataloader/knowledge_dataloader.py b/recbole/data/dataloader/knowledge_dataloader.py index bfa5f80bb..6b6bb00ac 100644 --- a/recbole/data/dataloader/knowledge_dataloader.py +++ b/recbole/data/dataloader/knowledge_dataloader.py @@ -145,8 +145,8 @@ def __init__( def __iter__(self): if self.state is None: raise ValueError( - '\033[1;31mThe dataloader\'s state must be set when using the kg based dataloader, \033[0m' - '\033[1;31myou should call set_mode() before __iter__()\033[0m' + 'The dataloader\'s state must be set when using the kg based dataloader, ' + 'you should call set_mode() before __iter__()' ) if self.state == KGDataLoaderState.KG: return self.kg_dataloader.__iter__() @@ -202,5 +202,5 @@ def set_mode(self, state): state (KGDataLoaderState): the state of :class:`KnowledgeBasedDataLoader`. """ if state not in set(KGDataLoaderState): - raise NotImplementedError(f'\033[1;31mKg data loader has no state named [{self.state}].\033[0m') + raise NotImplementedError(f'Kg data loader has no state named [{self.state}].') self.state = state diff --git a/recbole/data/dataloader/neg_sample_mixin.py b/recbole/data/dataloader/neg_sample_mixin.py index 7eb983d1c..e21d614ac 100644 --- a/recbole/data/dataloader/neg_sample_mixin.py +++ b/recbole/data/dataloader/neg_sample_mixin.py @@ -37,7 +37,7 @@ def __init__( self, config, dataset, sampler, neg_sample_args, batch_size=1, dl_format=InputType.POINTWISE, shuffle=False ): if neg_sample_args['strategy'] not in ['by', 'full']: - raise ValueError(f"\033[1;31mNeg_sample strategy [{neg_sample_args['strategy']}] has not been implemented.\033[0m") + raise ValueError(f"Neg_sample strategy [{neg_sample_args['strategy']}] has not been implemented.") self.sampler = sampler self.neg_sample_args = neg_sample_args @@ -52,7 +52,7 @@ def setup(self): def _batch_size_adaptation(self): """Adjust the batch size to ensure that each positive and negative interaction can be in a batch. """ - raise NotImplementedError('\033[1;31mMethod [batch_size_adaptation] should be implemented.\033[0m') + raise NotImplementedError('Method [batch_size_adaptation] should be implemented.') def _neg_sampling(self, inter_feat): """ @@ -62,21 +62,21 @@ def _neg_sampling(self, inter_feat): Returns: The user-item interaction table with negative example. """ - raise NotImplementedError('\033[1;31mMethod [neg_sampling] should be implemented.\033[0m') + raise NotImplementedError('Method [neg_sampling] should be implemented.') def get_pos_len_list(self): """ Returns: numpy.ndarray: Number of positive item for each user in a training/evaluating epoch. """ - raise NotImplementedError('\033[1;31mMethod [get_pos_len_list] should be implemented.\033[0m') + raise NotImplementedError('Method [get_pos_len_list] should be implemented.') def get_user_len_list(self): """ Returns: numpy.ndarray: Number of all item for each user in a training/evaluating epoch. """ - raise NotImplementedError('\033[1;31mMethod [get_user_len_list] should be implemented.\033[0m') + raise NotImplementedError('Method [get_user_len_list] should be implemented.') class NegSampleByMixin(NegSampleMixin): @@ -99,7 +99,7 @@ def __init__( self, config, dataset, sampler, neg_sample_args, batch_size=1, dl_format=InputType.POINTWISE, shuffle=False ): if neg_sample_args['strategy'] != 'by': - raise ValueError('\033[1;31mneg_sample strategy in GeneralInteractionBasedDataLoader() should be `by`\033[0m') + raise ValueError('neg_sample strategy in GeneralInteractionBasedDataLoader() should be `by`') self.user_inter_in_one_batch = (sampler.phase != 'train') and (config['eval_type'] != EvaluatorType.INDIVIDUAL) self.neg_sample_by = neg_sample_args['by'] @@ -123,7 +123,7 @@ def __init__( neg_item_feat_col = self.neg_prefix + item_feat_col dataset.copy_field_property(neg_item_feat_col, item_feat_col) else: - raise ValueError(f'\033[1;31m`neg sampling by` with dl_format [{dl_format}] not been implemented.\033[0m') + raise ValueError(f'`neg sampling by` with dl_format [{dl_format}] not been implemented.') super().__init__( config, dataset, sampler, neg_sample_args, batch_size=batch_size, dl_format=dl_format, shuffle=shuffle @@ -132,9 +132,9 @@ def __init__( def _neg_sample_by_pair_wise_sampling(self, *args): """Pair-wise sampling. """ - raise NotImplementedError('\033[1;31mMethod [neg_sample_by_pair_wise_sampling] should be implemented.\033[0m') + raise NotImplementedError('Method [neg_sample_by_pair_wise_sampling] should be implemented.') def _neg_sample_by_point_wise_sampling(self, *args): """Point-wise sampling. """ - raise NotImplementedError('\033[1;31mMethod [neg_sample_by_point_wise_sampling] should be implemented.\033[0m') + raise NotImplementedError('Method [neg_sample_by_point_wise_sampling] should be implemented.') diff --git a/recbole/data/dataloader/sequential_dataloader.py b/recbole/data/dataloader/sequential_dataloader.py index 7ee37b05e..51d3a845d 100644 --- a/recbole/data/dataloader/sequential_dataloader.py +++ b/recbole/data/dataloader/sequential_dataloader.py @@ -264,7 +264,7 @@ def _neg_sampling(self, inter_feat): pass def _shuffle(self): - self.logger.warnning('\033[1;31mSequentialFullDataLoader can\'t shuffle\033[0m') + self.logger.warnning('SequentialFullDataLoader can\'t shuffle') def _next_batch_data(self): interaction = super()._next_batch_data() diff --git a/recbole/data/dataset/dataset.py b/recbole/data/dataset/dataset.py index 7114585dd..f5aa9a1ec 100644 --- a/recbole/data/dataset/dataset.py +++ b/recbole/data/dataset/dataset.py @@ -27,6 +27,7 @@ from recbole.data.interaction import Interaction from recbole.data.utils import dlapi from recbole.utils import FeatureSource, FeatureType +from recbole.utils.utils import set_color class Dataset(object): @@ -105,7 +106,7 @@ def _from_scratch(self): """Load dataset from scratch. Initialize attributes firstly, then load data from atomic files, pre-process the dataset lastly. """ - self.logger.debug(f'\033[0;32mLoading {self.__class__} from scratch.\033[0m') + self.logger.debug(set_color('Loading {self.__class__} from scratch.', 'green')) self._get_preset() self._get_field_from_config() @@ -135,11 +136,11 @@ def _get_field_from_config(self): if (self.uid_field is None) ^ (self.iid_field is None): raise ValueError( - '\033[1;31mUSER_ID_FIELD and ITEM_ID_FIELD need to be set at the same time or not set at the same time.\033[0m' + 'USER_ID_FIELD and ITEM_ID_FIELD need to be set at the same time or not set at the same time.' ) - self.logger.debug(f'\033[0;34muid_field\033[0m: {self.uid_field}') - self.logger.debug(f'\033[0;34miid_field\033[0m: {self.iid_field}') + self.logger.debug(set_color('uid_field', 'blue') + ': {self.uid_field}') + self.logger.debug(set_color('iid_field', 'blue') + ': {self.iid_field}') def _data_processing(self): """Data preprocessing, including: @@ -208,10 +209,10 @@ def _restore_saved_dataset(self, saved_dataset): Args: saved_dataset (str): path for the saved dataset. """ - self.logger.debug(f'\033[0;32mRestoring dataset from [{saved_dataset}].\033[0m') + self.logger.debug(set_color('Restoring dataset from [{saved_dataset}].', 'green')) if (saved_dataset is None) or (not os.path.isdir(saved_dataset)): - raise ValueError(f'\033[1;31mFilepath [{saved_dataset}] need to be a dir.\033[0m') + raise ValueError(f'Filepath [{saved_dataset}] need to be a dir.') with open(os.path.join(saved_dataset, 'basic-info.json')) as file: basic_info = json.load(file) @@ -261,7 +262,7 @@ def _load_inter_feat(self, token, dataset_path): if self.benchmark_filename_list is None: inter_feat_path = os.path.join(dataset_path, f'{token}.inter') if not os.path.isfile(inter_feat_path): - raise ValueError(f'\033[1;31mFile {inter_feat_path} not exist.\033[0m') + raise ValueError(f'File {inter_feat_path} not exist.') inter_feat = self._load_feat(inter_feat_path, FeatureSource.INTERACTION) self.logger.debug(f'Interaction feature loaded successfully from [{inter_feat_path}].') @@ -276,7 +277,7 @@ def _load_inter_feat(self, token, dataset_path): sub_inter_feats.append(temp) sub_inter_lens.append(len(temp)) else: - raise ValueError(f'\033[1;31mFile {file_path} not exist.\033[0m') + raise ValueError(f'File {file_path} not exist.') inter_feat = pd.concat(sub_inter_feats) self.inter_feat, self.file_size_list = inter_feat, sub_inter_lens @@ -306,9 +307,9 @@ def _load_user_or_item_feat(self, token, dataset_path, source, field_name): field = getattr(self, field_name, None) if feat is not None and field is None: - raise ValueError(f'\033[1;31m{field_name} must be exist if {source.value}_feat exist.\033[0m') + raise ValueError(f'{field_name} must be exist if {source.value}_feat exist.') if feat is not None and field not in feat: - raise ValueError(f'\033[1;31m{field_name} must be loaded if {source.value}_feat is loaded.\033[0m') + raise ValueError(f'{field_name} must be loaded if {source.value}_feat is loaded.') if field in self.field2source: self.field2source[field] = FeatureSource(source.value + '_id') @@ -329,12 +330,12 @@ def _load_additional_feat(self, token, dataset_path): return for suf in self.config['additional_feat_suffix']: if hasattr(self, f'{suf}_feat'): - raise ValueError(f'\033[1;31m{suf}_feat already exist.\033[0m') + raise ValueError(f'{suf}_feat already exist.') feat_path = os.path.join(dataset_path, f'{token}.{suf}') if os.path.isfile(feat_path): feat = self._load_feat(feat_path, suf) else: - raise ValueError(f'\033[1;31mAdditional feature file [{feat_path}] not found.\033[0m') + raise ValueError(f'Additional feature file [{feat_path}] not found.') setattr(self, f'{suf}_feat', feat) def _get_load_and_unload_col(self, source): @@ -364,11 +365,11 @@ def _get_load_and_unload_col(self, source): unload_col = None if load_col and unload_col: - raise ValueError(f'\033[1;31mload_col [{load_col}] and unload_col [{unload_col}] can not be set the same time.\033[0m') + raise ValueError(f'load_col [{load_col}] and unload_col [{unload_col}] can not be set the same time.') - self.logger.debug(f'\033[0;35m[{source}]: \033[0m') - self.logger.debug(f'\t \033[0;34mload_col\033[0m: [{load_col}]') - self.logger.debug(f'\t \033[0;34munload_col\033[0m: [{unload_col}]') + self.logger.debug(set_color('[{source}]: ', 'pink')) + self.logger.debug(set_color('\t load_col', 'blue') + ': [{load_col}]') + self.logger.debug(set_color('\t unload_col', 'blue') + ': [{unload_col}]') return load_col, unload_col def _load_feat(self, filepath, source): @@ -388,7 +389,7 @@ def _load_feat(self, filepath, source): Their length is limited only after calling :meth:`~_dict_to_interaction` or :meth:`~_dataframe_to_interaction` """ - self.logger.debug(f'\033[0;32mLoading feature from [{filepath}] (source: [{source}]).\033[0m') + self.logger.debug(set_color('Loading feature from [{filepath}] (source: [{source}]).', 'green')) load_col, unload_col = self._get_load_and_unload_col(source) if load_col == set(): @@ -405,7 +406,7 @@ def _load_feat(self, filepath, source): try: ftype = FeatureType(ftype) except ValueError: - raise ValueError(f'\033[1;31mType {ftype} from field {field} is not supported.\033[0m') + raise ValueError(f'Type {ftype} from field {field} is not supported.') if load_col is not None and field not in load_col: continue if unload_col is not None and field in unload_col: @@ -446,11 +447,11 @@ def _user_item_feat_preparation(self): if self.user_feat is not None: new_user_df = pd.DataFrame({self.uid_field: np.arange(self.user_num)}) self.user_feat = pd.merge(new_user_df, self.user_feat, on=self.uid_field, how='left') - self.logger.debug('\033[0;32mordering user features by user id.\033[0m') + self.logger.debug(set_color('ordering user features by user id.', 'green')) if self.item_feat is not None: new_item_df = pd.DataFrame({self.iid_field: np.arange(self.item_num)}) self.item_feat = pd.merge(new_item_df, self.item_feat, on=self.iid_field, how='left') - self.logger.debug('\033[0;32mordering item features by user id.\033[0m') + self.logger.debug(set_color('ordering item features by user id.', 'green')) def _preload_weight_matrix(self): """Transfer preload weight features into :class:`numpy.ndarray` with shape ``[id_token_length]`` @@ -465,16 +466,16 @@ def _preload_weight_matrix(self): for preload_id_field in preload_fields: preload_value_field = preload_fields[preload_id_field] if preload_id_field not in self.field2source: - raise ValueError(f'\033[1;31mPreload id field [{preload_id_field}] not exist.\033[0m') + raise ValueError(f'Preload id field [{preload_id_field}] not exist.') if preload_value_field not in self.field2source: - raise ValueError(f'\033[1;31mPreload value field [{preload_value_field}] not exist.\033[0m') + raise ValueError(f'Preload value field [{preload_value_field}] not exist.') pid_source = self.field2source[preload_id_field] pv_source = self.field2source[preload_value_field] if pid_source != pv_source: raise ValueError( - f'\033[1;31mPreload id field [{preload_id_field}] is from source [{pid_source}],\033[0m' - f'\033[1;31mwhile preload value field [{preload_value_field}] is from source [{pv_source}], \033[0m' - f'\033[1;31mwhich should be the same.\033[0m' + f'Preload id field [{preload_id_field}] is from source [{pid_source}],' + f'while preload value field [{preload_value_field}] is from source [{pv_source}], ' + f'which should be the same.' ) for feat_name in self.feat_name_list: feat = getattr(self, feat_name) @@ -482,7 +483,7 @@ def _preload_weight_matrix(self): id_ftype = self.field2type[preload_id_field] if id_ftype != FeatureType.TOKEN: raise ValueError( - f'\033[1;31mPreload id field [{preload_id_field}] should be type token, but is [{id_ftype}].\033[0m' + f'Preload id field [{preload_id_field}] should be type token, but is [{id_ftype}].' ) value_ftype = self.field2type[preload_value_field] token_num = self.num(preload_id_field) @@ -520,7 +521,7 @@ def _fill_nan(self): For fields with type :obj:`~recbole.utils.enum_type.FeatureType.FLOAT`, missing value will be filled by the average of original data. """ - self.logger.debug('\033[0;32mFilling nan\033[0m') + self.logger.debug(set_color('Filling nan', 'green')) for feat_name in self.feat_name_list: feat = getattr(self, feat_name) @@ -545,14 +546,14 @@ def _normalize(self): Only float-like fields can be normalized. """ if self.config['normalize_field'] is not None and self.config['normalize_all'] is True: - raise ValueError('\033[1;31mNormalize_field and normalize_all can\'t be set at the same time.\033[0m') + raise ValueError('Normalize_field and normalize_all can\'t be set at the same time.') if self.config['normalize_field']: fields = self.config['normalize_field'] for field in fields: ftype = self.field2type[field] if field not in self.field2type: - raise ValueError(f'\033[1;31mField [{field}] does not exist.\033[0m') + raise ValueError(f'Field [{field}] does not exist.') elif ftype != FeatureType.FLOAT and ftype != FeatureType.FLOAT_SEQ: self.logger.warning(f'{field} is not a FLOAT/FLOAT_SEQ feat, which will not be normalized.') elif self.config['normalize_all']: @@ -560,7 +561,7 @@ def _normalize(self): else: return - self.logger.debug(f'\033[0;34mNormalized fields\033[0m: {fields}') + self.logger.debug(set_color('Normalized fields', 'blue') + ': {fields}') for feat_name in self.feat_name_list: feat = getattr(self, feat_name) @@ -715,7 +716,7 @@ def _get_illegal_ids_by_inter_num(self, field, feat, inter_num, max_num=None, mi Returns: set: illegal ids, whose inter num out of [min_num, max_num] """ - self.logger.debug(f'\033[0;34mget_illegal_ids_by_inter_num\033[0m: field=[{field}], max_num=[{max_num}], min_num=[{min_num}]') + self.logger.debug(set_color('get_illegal_ids_by_inter_num', 'blue') + ': field=[{field}], max_num=[{max_num}], min_num=[{min_num}]') max_num = max_num or np.inf min_num = min_num or -1 @@ -744,7 +745,7 @@ def _reset_index(self): for feat_name in self.feat_name_list: feat = getattr(self, feat_name) if feat.empty: - raise ValueError('\033[1;31mSome feat is empty, please check the filtering settings.\033[0m') + raise ValueError('Some feat is empty, please check the filtering settings.') feat.reset_index(drop=True, inplace=True) def _drop_by_value(self, val, cmp): @@ -760,13 +761,13 @@ def _drop_by_value(self, val, cmp): if val is None: return [] - self.logger.debug(f'\033[0;34mdrop_by_value\033[0m: val={val}') + self.logger.debug(set_color('drop_by_value', 'blue') + ': val={val}') filter_field = [] for field in val: if field not in self.field2type: - raise ValueError(f'\033[1;31mField [{field}] not defined in dataset.\033[0m') + raise ValueError(f'Field [{field}] not defined in dataset.') if self.field2type[field] not in {FeatureType.FLOAT, FeatureType.FLOAT_SEQ}: - raise ValueError(f'\033[1;31mField [{field}] is not float-like field in dataset, which can\'t be filter.\033[0m') + raise ValueError(f'Field [{field}] is not float-like field in dataset, which can\'t be filter.') for feat_name in self.feat_name_list: feat = getattr(self, feat_name) if field in feat: @@ -826,14 +827,14 @@ def _set_label_by_threshold(self): self.logger.debug(f'Set label by {threshold}.') if len(threshold) != 1: - raise ValueError('\033[1;31mThreshold length should be 1.\033[0m') + raise ValueError('Threshold length should be 1.') self.set_field_property(self.label_field, FeatureType.FLOAT, FeatureSource.INTERACTION, 1) for field, value in threshold.items(): if field in self.inter_feat: self.inter_feat[self.label_field] = (self.inter_feat[field] >= value).astype(int) else: - raise ValueError(f'\033[1;31mField [{field}] not in inter_feat.\033[0m') + raise ValueError(f'Field [{field}] not in inter_feat.') self._del_col(self.inter_feat, field) def _get_fields_in_same_space(self): @@ -858,14 +859,14 @@ def _get_fields_in_same_space(self): elif count == 1: continue else: - raise ValueError(f'\033[1;31mField [{field}] occurred in `fields_in_same_space` more than one time.\033[0m') + raise ValueError(f'Field [{field}] occurred in `fields_in_same_space` more than one time.') for field_set in fields_in_same_space: if self.uid_field in field_set and self.iid_field in field_set: - raise ValueError('\033[1;31muid_field and iid_field can\'t in the same ID space\033[0m') + raise ValueError('uid_field and iid_field can\'t in the same ID space') for field in field_set: if field not in token_like_fields: - raise ValueError(f'\033[1;31mField [{field}] is not a token-like field.\033[0m') + raise ValueError(f'Field [{field}] is not a token-like field.') fields_in_same_space.extend(additional) return fields_in_same_space @@ -908,7 +909,7 @@ def _remap_ID_all(self): """Get ``config['fields_in_same_space']`` firstly, and remap each. """ fields_in_same_space = self._get_fields_in_same_space() - self.logger.debug(f'\033[0;34mfields_in_same_space\033[0m: {fields_in_same_space}') + self.logger.debug(set_color('fields_in_same_space', 'blue') + ': {fields_in_same_space}') for field_set in fields_in_same_space: remap_list = self._get_remap_list(field_set) self._remap(remap_list) @@ -975,7 +976,7 @@ def num(self, field): int: The number of different tokens (``1`` if ``field`` is a float-like field). """ if field not in self.field2type: - raise ValueError(f'\033[1;31mField [{field}] not defined in dataset.\033[0m') + raise ValueError(f'Field [{field}] not defined in dataset.') if self.field2type[field] not in {FeatureType.TOKEN, FeatureType.TOKEN_SEQ}: return self.field2seqlen[field] else: @@ -1080,11 +1081,11 @@ def token2id(self, field, tokens): if tokens in self.field2token_id[field]: return self.field2token_id[field][tokens] else: - raise ValueError('\033[1;31mtoken [{}] is not existed\033[0m') + raise ValueError('token [{}] is not existed') elif isinstance(tokens, (list, np.ndarray)): return np.array([self.token2id(field, token) for token in tokens]) else: - raise TypeError('\033[1;31mThe type of tokens [{}] is not supported\033[0m') + raise TypeError('The type of tokens [{}] is not supported') @dlapi.set() def id2token(self, field, ids): @@ -1101,9 +1102,9 @@ def id2token(self, field, ids): return self.field2id_token[field][ids] except IndexError: if isinstance(ids, list): - raise ValueError(f'\033[1;31m[{ids}] is not a one-dimensional list.\033[0m') + raise ValueError(f'[{ids}] is not a one-dimensional list.') else: - raise ValueError(f'\033[1;31m[{ids}] is not a valid ids.\033[0m') + raise ValueError(f'[{ids}] is not a valid ids.') @property @dlapi.set() @@ -1177,7 +1178,7 @@ def _check_field(self, *field_names): """ for field_name in field_names: if getattr(self, field_name, None) is None: - raise ValueError(f'\033[1;31m{field_name} isn\'t set.\033[0m') + raise ValueError(f'{field_name} isn\'t set.') @dlapi.set() def join(self, df): @@ -1206,19 +1207,19 @@ def __repr__(self): return self.__str__() def __str__(self): - info = ['\033[1;35m' + self.dataset_name + '\033[0m'] + info = [set_color(self.dataset_name, 'pink')] if self.uid_field: info.extend([ - f'\033[0;34mThe number of users\033[0m: {self.user_num}', f'\033[0;34mAverage actions of users\033[0m: {self.avg_actions_of_users}' + set_color('The number of users', 'blue') + ': {self.user_num}', set_color('Average actions of users', 'blue') + ': {self.avg_actions_of_users}' ]) if self.iid_field: info.extend([ - f'\033[0;34mThe number of items\033[0m: {self.item_num}', f'\033[0;34mAverage actions of items\033[0m: {self.avg_actions_of_items}' + set_color('The number of items', 'blue') + ': {self.item_num}', set_color('Average actions of items', 'blue') + ': {self.avg_actions_of_items}' ]) - info.append(f'\033[0;34mThe number of inters\033[0m: {self.inter_num}') + info.append(set_color('The number of inters', 'blue') + ': {self.inter_num}') if self.uid_field and self.iid_field: - info.append(f'\033[0;34mThe sparsity of the dataset\033[0m: {self.sparsity * 100}%') - info.append(f'\033[0;34mRemain Fields\033[0m: {list(self.field2type)}') + info.append(set_color('The sparsity of the dataset', 'blue') + ': {self.sparsity * 100}%') + info.append(set_color('Remain Fields', 'blue') + ': {list(self.field2type)}') return '\n'.join(info) def copy(self, new_inter_feat): @@ -1349,7 +1350,7 @@ def leave_one_out(self, group_by, leave_one_num=1): """ self.logger.debug(f'leave one out, group_by=[{group_by}], leave_one_num=[{leave_one_num}]') if group_by is None: - raise ValueError('\033[1;31mleave one out strategy require a group field\033[0m') + raise ValueError('leave one out strategy require a group field') grouped_inter_feat_index = self._grouped_index(self.inter_feat[group_by].numpy()) next_index = self._split_index_by_leave_one_out(grouped_inter_feat_index, leave_one_num) @@ -1419,7 +1420,7 @@ def save(self, filepath): filepath (str): path of saved dir. """ if (filepath is None) or (not os.path.isdir(filepath)): - raise ValueError(f'\033[1;31mFilepath [{filepath}] need to be a dir.\033[0m') + raise ValueError(f'Filepath [{filepath}] need to be a dir.') self.logger.debug(f'Saving into [{filepath}]') basic_info = { @@ -1489,7 +1490,7 @@ def _create_sparse_matrix(self, df_feat, source_field, target_field, form='coo', data = np.ones(len(df_feat)) else: if value_field not in df_feat: - raise ValueError(f'\033[1;31mValue_field [{value_field}] should be one of `df_feat`\'s features.\033[0m') + raise ValueError(f'Value_field [{value_field}] should be one of `df_feat`\'s features.') data = df_feat[value_field] mat = coo_matrix((data, (src, tgt)), shape=(self.num(source_field), self.num(target_field))) @@ -1498,7 +1499,7 @@ def _create_sparse_matrix(self, df_feat, source_field, target_field, form='coo', elif form == 'csr': return mat.tocsr() else: - raise NotImplementedError(f'\033[1;31mSparse matrix format [{form}] has not been implemented.\033[0m') + raise NotImplementedError(f'Sparse matrix format [{form}] has not been implemented.') def _create_graph(self, tensor_feat, source_field, target_field, form='dgl', value_field=None): """Get graph that describe relations between two fields. @@ -1545,7 +1546,7 @@ def _create_graph(self, tensor_feat, source_field, target_field, form='dgl', val graph = Data(edge_index=torch.stack([src, tgt]), edge_attr=edge_attr) return graph else: - raise NotImplementedError(f'\033[1;31mGraph format [{form}] has not been implemented.\033[0m') + raise NotImplementedError(f'Graph format [{form}] has not been implemented.') @dlapi.set() def inter_matrix(self, form='coo', value_field=None): @@ -1565,7 +1566,7 @@ def inter_matrix(self, form='coo', value_field=None): scipy.sparse: Sparse matrix in form ``coo`` or ``csr``. """ if not self.uid_field or not self.iid_field: - raise ValueError('\033[1;31mdataset does not exist uid/iid, thus can not converted to sparse matrix.\033[0m') + raise ValueError('dataset does not exist uid/iid, thus can not converted to sparse matrix.') return self._create_sparse_matrix(self.inter_feat, self.uid_field, self.iid_field, form, value_field) def _history_matrix(self, row, value_field=None): @@ -1598,7 +1599,7 @@ def _history_matrix(self, row, value_field=None): values = np.ones(len(self.inter_feat)) else: if value_field not in self.inter_feat: - raise ValueError(f'\033[1;31mValue_field [{value_field}] should be one of `inter_feat`\'s features.\033[0m') + raise ValueError(f'Value_field [{value_field}] should be one of `inter_feat`\'s features.') values = self.inter_feat[value_field].numpy() if row == 'user': @@ -1692,7 +1693,7 @@ def get_preload_weight(self, field): numpy.ndarray: preloaded weight matrix. See :doc:`../user_guide/data/data_args` for details. """ if field not in self._preloaded_weight: - raise ValueError(f'\033[1;31mField [{field}] not in preload_weight\033[0m') + raise ValueError(f'Field [{field}] not in preload_weight') return self._preloaded_weight[field] def _dataframe_to_interaction(self, data): diff --git a/recbole/data/dataset/kg_dataset.py b/recbole/data/dataset/kg_dataset.py index 1a8870d26..dd193aaad 100644 --- a/recbole/data/dataset/kg_dataset.py +++ b/recbole/data/dataset/kg_dataset.py @@ -22,6 +22,7 @@ from recbole.data.dataset import Dataset from recbole.data.utils import dlapi from recbole.utils import FeatureSource, FeatureType +from recbole.utils.utils import set_color class KnowledgeBasedDataset(Dataset): @@ -80,8 +81,8 @@ def _get_field_from_config(self): self._check_field('head_entity_field', 'tail_entity_field', 'relation_field', 'entity_field') self.set_field_property(self.entity_field, FeatureType.TOKEN, FeatureSource.KG, 1) - self.logger.debug(f'\033[0;34mrelation_field\033[0m: {self.relation_field}') - self.logger.debug(f'\033[0;34mentity_field\033[0m: {self.entity_field}') + self.logger.debug(set_color('relation_field', 'blue') + ': {self.relation_field}') + self.logger.debug(set_color('entity_field', 'blue') + ': {self.entity_field}') def _data_processing(self): self._set_field2ent_level() @@ -138,10 +139,10 @@ def save(self, filepath): raise NotImplementedError() def _load_kg(self, token, dataset_path): - self.logger.debug(f'\033[0;32mLoading kg from [{dataset_path}].\033[0m') + self.logger.debug(set_color('Loading kg from [{}].', 'green').format(dataset_path)) kg_path = os.path.join(dataset_path, f'{token}.kg') if not os.path.isfile(kg_path): - raise ValueError(f'\033[1;31m[{token}.kg] not found in [{dataset_path}].\033[0m') + raise ValueError('[{token}.kg] not found in [{dataset_path}].') df = self._load_feat(kg_path, FeatureSource.KG) self._check_kg(df) return df @@ -153,10 +154,10 @@ def _check_kg(self, kg): assert self.relation_field in kg, kg_warn_message.format(self.relation_field) def _load_link(self, token, dataset_path): - self.logger.debug(f'\033[0;32mLoading link from [{dataset_path}].\033[0m') + self.logger.debug(set_color('Loading link from [{}].', 'green').format(dataset_path)) link_path = os.path.join(dataset_path, f'{token}.link') if not os.path.isfile(link_path): - raise ValueError(f'\033[1;31m[{token}.link] not found in [{dataset_path}].\033[0m') + raise ValueError(f'[{token}.link] not found in [{dataset_path}].') df = self._load_feat(link_path, 'link') self._check_link(df) @@ -207,7 +208,7 @@ def _get_ent_fields_in_same_space(self): if self._contain_ent_field(field_set): field_set = self._remove_ent_field(field_set) ent_fields.update(field_set) - self.logger.debug(f'\033[0;34ment_fields\033[0m: {fields_in_same_space}') + self.logger.debug(set_color('ent_fields', 'blue') + ': {fields_in_same_space}') return ent_fields def _remove_ent_field(self, field_set): @@ -442,7 +443,7 @@ def kg_graph(self, form='coo', value_field=None): elif form in ['dgl', 'pyg']: return self._create_graph(*args) else: - raise NotImplementedError('\033[1;31mkg graph format [{}] has not been implemented.\033[0m') + raise NotImplementedError('kg graph format [{}] has not been implemented.') def _create_ckg_sparse_matrix(self, form='coo', show_relation=False): user_num = self.user_num @@ -473,7 +474,7 @@ def _create_ckg_sparse_matrix(self, form='coo', show_relation=False): elif form == 'csr': return mat.tocsr() else: - raise NotImplementedError(f'\033[1;31mSparse matrix format [{form}] has not been implemented.\033[0m') + raise NotImplementedError(f'Sparse matrix format [{form}] has not been implemented.') def _create_ckg_graph(self, form='dgl', show_relation=False): user_num = self.user_num @@ -510,7 +511,7 @@ def _create_ckg_graph(self, form='dgl', show_relation=False): graph = Data(edge_index=torch.stack([src, tgt]), edge_attr=edge_attr) return graph else: - raise NotImplementedError(f'Graph format [{form}] has not been implemented.\033[0m') + raise NotImplementedError(f'Graph format [{form}] has not been implemented.') @dlapi.set() def ckg_graph(self, form='coo', value_field=None): @@ -542,7 +543,7 @@ def ckg_graph(self, form='coo', value_field=None): https://github.com/rusty1s/pytorch_geometric """ if value_field is not None and value_field != self.relation_field: - raise ValueError(f'\033[1;31mValue_field [{value_field}] can only be [{self.relation_field}] in ckg_graph.\033[0m') + raise ValueError(f'Value_field [{value_field}] can only be [{self.relation_field}] in ckg_graph.') show_relation = value_field is not None if form in ['coo', 'csr']: @@ -550,4 +551,4 @@ def ckg_graph(self, form='coo', value_field=None): elif form in ['dgl', 'pyg']: return self._create_ckg_graph(form, show_relation) else: - raise NotImplementedError('\033[1;31mckg graph format [{}] has not been implemented.\033[0m') + raise NotImplementedError('ckg graph format [{}] has not been implemented.') diff --git a/recbole/data/dataset/sequential_dataset.py b/recbole/data/dataset/sequential_dataset.py index 60aaa83f5..98e0d7df3 100644 --- a/recbole/data/dataset/sequential_dataset.py +++ b/recbole/data/dataset/sequential_dataset.py @@ -89,9 +89,9 @@ def prepare_data_augmentation(self): def leave_one_out(self, group_by, leave_one_num=1): self.logger.debug(f'Leave one out, group_by=[{group_by}], leave_one_num=[{leave_one_num}].') if group_by is None: - raise ValueError('\033[1;31mLeave one out strategy require a group field.\033[0m') + raise ValueError('Leave one out strategy require a group field.') if group_by != self.uid_field: - raise ValueError('\033[1;31mSequential models require group by user.\033[0m') + raise ValueError('Sequential models require group by user.') self.prepare_data_augmentation() grouped_index = self._grouped_index(self.uid_list) @@ -121,7 +121,7 @@ def inter_matrix(self, form='coo', value_field=None): scipy.sparse: Sparse matrix in form ``coo`` or ``csr``. """ if not self.uid_field or not self.iid_field: - raise ValueError('\033[1;31mdataset does not exist uid/iid, thus can not converted to sparse matrix.\033[0m') + raise ValueError('dataset does not exist uid/iid, thus can not converted to sparse matrix.') self.logger.warning('Load interaction matrix may lead to label leakage from testing phase, this implementation ' 'only provides the interactions corresponding to specific phase') @@ -133,12 +133,12 @@ def build(self, eval_setting): ordering_args = eval_setting.ordering_args if ordering_args['strategy'] == 'shuffle': - raise ValueError('\033[1;31mOrdering strategy `shuffle` is not supported in sequential models.\033[0m') + raise ValueError('Ordering strategy `shuffle` is not supported in sequential models.') elif ordering_args['strategy'] == 'by': if ordering_args['field'] != self.time_field: - raise ValueError('\033[1;31mSequential models require `TO` (time ordering) strategy.\033[0m') + raise ValueError('Sequential models require `TO` (time ordering) strategy.') if ordering_args['ascending'] is not True: - raise ValueError('\033[1;31mSequential models require `time_field` to sort in ascending order.\033[0m') + raise ValueError('Sequential models require `time_field` to sort in ascending order.') group_field = eval_setting.group_field @@ -146,4 +146,4 @@ def build(self, eval_setting): if split_args['strategy'] == 'loo': return self.leave_one_out(group_by=group_field, leave_one_num=split_args['leave_one_num']) else: - ValueError('\033[1;31mSequential models require `loo` (leave one out) split strategy.\033[0m') + ValueError('Sequential models require `loo` (leave one out) split strategy.') diff --git a/recbole/data/dataset/social_dataset.py b/recbole/data/dataset/social_dataset.py index 09cb960e1..66d1c050f 100644 --- a/recbole/data/dataset/social_dataset.py +++ b/recbole/data/dataset/social_dataset.py @@ -45,8 +45,8 @@ def _get_field_from_config(self): self.target_field = self.config['TARGET_ID_FIELD'] self._check_field('source_field', 'target_field') - self.logger.debug(f'\033[0;34msource_id_field\033[0m: {self.source_field}') - self.logger.debug(f'\033[0;34mtarget_id_field\033[0m: {self.target_field}') + self.logger.debug(set_color('source_id_field', 'blue') + ': {self.source_field}') + self.logger.debug(set_color('target_id_field', 'blue') + ': {self.target_field}') def _load_data(self, token, dataset_path): """Load ``.net`` additionally. @@ -65,10 +65,10 @@ def _load_net(self, dataset_name, dataset_path): if os.path.isfile(net_file_path): net_feat = self._load_feat(net_file_path, FeatureSource.NET) if net_feat is None: - raise ValueError('\033[1;31m.net file exist, but net_feat is None, please check your load_col\033[0m') + raise ValueError('.net file exist, but net_feat is None, please check your load_col') return net_feat else: - raise ValueError(f'\033[1;31mFile {net_file_path} not exist.\033[0m') + raise ValueError(f'File {net_file_path} not exist.') def _get_fields_in_same_space(self): """Parsing ``config['fields_in_same_space']``. See :doc:`../user_guide/data/data_args` for detail arg setting. @@ -120,7 +120,7 @@ def net_graph(self, form='coo', value_field=None): elif form in ['dgl', 'pyg']: return self._create_graph(*args) else: - raise NotImplementedError('\033[1;31mnet graph format [{}] has not been implemented.\033[0m') + raise NotImplementedError('net graph format [{}] has not been implemented.') def __str__(self): info = [super().__str__(), f'The number of connections of social network: {len(self.net_feat)}'] diff --git a/recbole/data/interaction.py b/recbole/data/interaction.py index bd29f354d..218b8b035 100644 --- a/recbole/data/interaction.py +++ b/recbole/data/interaction.py @@ -86,7 +86,7 @@ def __init__(self, interaction, pos_len_list=None, user_len_list=None): self.set_additional_info(pos_len_list, user_len_list) for k in self.interaction: if not isinstance(self.interaction[k], torch.Tensor): - raise ValueError(f'\033[1;31mInteraction [{interaction}] should only contains torch.Tensor\033[0m') + raise ValueError(f'Interaction [{interaction}] should only contains torch.Tensor') self.length = -1 for k in self.interaction: self.length = max(self.length, self.interaction[k].shape[0]) @@ -95,7 +95,7 @@ def set_additional_info(self, pos_len_list=None, user_len_list=None): self.pos_len_list = pos_len_list self.user_len_list = user_len_list if (self.pos_len_list is None) ^ (self.user_len_list is None): - raise ValueError('\033[1;31mpos_len_list and user_len_list should be both None or valued.\033[0m') + raise ValueError('pos_len_list and user_len_list should be both None or valued.') def __iter__(self): return self.interaction.__iter__() @@ -253,7 +253,7 @@ def drop(self, column): column (str): the column to be dropped. """ if column not in self.interaction: - raise ValueError(f'\033[1;31mColumn [{column}] is not in [{self}].\033[0m') + raise ValueError(f'Column [{column}] is not in [{self}].') del self.interaction[column] def _reindex(self, index): @@ -285,29 +285,29 @@ def sort(self, by, ascending=True): """ if isinstance(by, str): if by not in self.interaction: - raise ValueError(f'\033[1;31m[{by}] is not exist in interaction [{self}].\033[0m') + raise ValueError(f'[{by}] is not exist in interaction [{self}].') by = [by] elif isinstance(by, (list, tuple)): for b in by: if b not in self.interaction: - raise ValueError(f'\033[1;31m[{b}] is not exist in interaction [{self}].\033[0m') + raise ValueError(f'[{b}] is not exist in interaction [{self}].') else: - raise TypeError(f'\033[1;31mWrong type of by [{by}].\033[0m') + raise TypeError(f'Wrong type of by [{by}].') if isinstance(ascending, bool): ascending = [ascending] elif isinstance(ascending, (list, tuple)): for a in ascending: if not isinstance(a, bool): - raise TypeError(f'\033[1;31mWrong type of ascending [{ascending}].\033[0m') + raise TypeError(f'Wrong type of ascending [{ascending}].') else: - raise TypeError(f'\033[1;31mWrong type of ascending [{ascending}].\033[0m') + raise TypeError(f'Wrong type of ascending [{ascending}].') if len(by) != len(ascending): if len(ascending) == 1: ascending = ascending * len(by) else: - raise ValueError(f'\033[1;31mby [{by}] and ascending [{ascending}] should have same length.\033[0m') + raise ValueError(f'by [{by}] and ascending [{ascending}] should have same length.') for b, a in zip(by[::-1], ascending[::-1]): index = np.argsort(self.interaction[b], kind='stable') @@ -334,14 +334,14 @@ def cat_interactions(interactions): :class:`Interaction`: Concatenated interaction. """ if not isinstance(interactions, (list, tuple)): - raise TypeError(f'\033[1;31mInteractions [{interactions}] should be list or tuple.\033[0m') + raise TypeError(f'Interactions [{interactions}] should be list or tuple.') if len(interactions) == 0: - raise ValueError(f'\033[1;31mInteractions [{interactions}] should have some interactions.\033[0m') + raise ValueError(f'Interactions [{interactions}] should have some interactions.') columns_set = set(interactions[0].columns) for inter in interactions: if columns_set != set(inter.columns): - raise ValueError(f'\033[1;31mInteractions [{interactions}] should have some interactions.\033[0m') + raise ValueError(f'Interactions [{interactions}] should have some interactions.') new_inter = {col: torch.cat([inter[col] for inter in interactions]) for col in columns_set} return Interaction(new_inter) diff --git a/recbole/data/utils.py b/recbole/data/utils.py index f7b3e965f..717fc4bd8 100644 --- a/recbole/data/utils.py +++ b/recbole/data/utils.py @@ -20,6 +20,7 @@ from recbole.data.dataloader import * from recbole.sampler import KGSampler, Sampler, RepeatableSampler from recbole.utils import ModelType, ensure_dir +from recbole.utils.utils import set_color def create_dataset(config): @@ -91,8 +92,8 @@ def data_preparation(config, dataset, save=False): if train_neg_sample_args['strategy'] != 'none': if dataset.label_field in dataset.inter_feat: raise ValueError( - f'\033[1;31m`training_neg_sample_num` should be 0 \033[0m' - f'\033[1;31mif inter_feat have label_field [{dataset.label_field}].\033[0m' + f'`training_neg_sample_num` should be 0 ' + f'if inter_feat have label_field [{dataset.label_field}].' ) if model_type != ModelType.SEQUENTIAL: sampler = Sampler(phases, built_datasets, train_neg_sample_args['distribution']) @@ -105,12 +106,16 @@ def data_preparation(config, dataset, save=False): train_kwargs['kg_sampler'] = kg_sampler dataloader = get_data_loader('train', config, train_neg_sample_args) - logger.info(f'\033[1;35mBuild\033[0m \033[1;33m[{dataloader.__name__}]\033[0m for \033[1;33m[train]\033[0m with format \033[1;33m[{train_kwargs["dl_format"]}]\033[0m') + logger.info((set_color('Build', 'pink') + set_color(' [{}]', 'yellow') + + ' for ' + set_color('[train]', 'yellow') + ' with format ' + + set_color('[{}]', 'yellow')).format(dataloader.__name__, train_kwargs["dl_format"])) if train_neg_sample_args['strategy'] != 'none': - logger.info(f'\033[1;35m[train]\033[0m \033[1;34mNegative Sampling\033[0m: {train_neg_sample_args}') + logger.info(set_color('[train]', 'pink') + set_color(' Negative Sampling', 'blue') + ': {train_neg_sample_args}') else: - logger.info(f'\033[1;33m[train] No Negative Sampling\033[0m') - logger.info(f'\033[1;35m[train]\033[0m \033[1;36mbatch_size\033[0m = \033[0;33m[{train_kwargs["batch_size"]}]\033[0m, \033[1;36mshuffle\033[0m = \033[0;33m[{train_kwargs["shuffle"]}]\033[0m\n') + logger.info(set_color('[train]', 'pink') + set_color(' No Negative Sampling', 'yellow')) + logger.info(set_color('[train]', 'pink') + set_color(' batch_size', 'cyan') + ' = ' + + set_color('[{train_kwargs["batch_size"]}]', 'yellow') + ', ' + + set_color('shuffle', 'cyan') + ' = ' + set_color('[{train_kwargs["shuffle"]}]\n', 'yellow')) train_data = dataloader(**train_kwargs) # Evaluation @@ -125,8 +130,8 @@ def data_preparation(config, dataset, save=False): if eval_neg_sample_args['strategy'] != 'none': if dataset.label_field in dataset.inter_feat: raise ValueError( - f'\033[1;31mIt can not validate with `{es.es_str[1]}` \033[0m' - f'\033[1;31mwhen inter_feat have label_field [{dataset.label_field}].\033[0m' + f'It can not validate with `{es.es_str[1]}` ' + f'when inter_feat have label_field [{dataset.label_field}].' ) if sampler is None: if model_type != ModelType.SEQUENTIAL: @@ -142,9 +147,13 @@ def data_preparation(config, dataset, save=False): test_kwargs.update(eval_kwargs) dataloader = get_data_loader('evaluation', config, eval_neg_sample_args) - logger.info(f'\033[1;35mBuild\033[0m \033[1;33m[{dataloader.__name__}]\033[0m for \033[1;33m[evaluation]\033[0m with format \033[1;33m[{eval_kwargs["dl_format"]}]\033[0m') + logger.info((set_color('Build', 'pink') + set_color(' [{}]', 'yellow') + + ' for ' + set_color('[evaluation]', 'yellow') + ' with format ' + + set_color('[{}]', 'yellow')).format(dataloader.__name__, eval_kwargs["dl_format"])) logger.info(es) - logger.info(f'\033[1;35m[evaluation]\033[0m \033[1;36mbatch_size\033[0m = \033[1;33m[{eval_kwargs["batch_size"]}]\033[0m, \033[1;36mshuffle\033[0m = \033[1;33m[{eval_kwargs["shuffle"]}]\033[0m\n') + logger.info((set_color('[evaluation]', 'pink') + set_color(' batch_size', 'cyan') + ' = ' + + set_color('[{}]', 'yellow') + ', ' + set_color('shuffle', 'cyan') + ' = ' + + set_color('[{}]\n', 'yellow')).format(eval_kwargs["batch_size"], eval_kwargs["shuffle"])) valid_data = dataloader(**valid_kwargs) test_data = dataloader(**test_kwargs) @@ -163,7 +172,7 @@ def save_datasets(save_path, name, dataset): name = [name] dataset = [dataset] if len(name) != len(dataset): - raise ValueError(f'\033[1;31mLength of name {name} should equal to length of dataset {dataset}.\033[0m') + raise ValueError(f'Length of name {name} should equal to length of dataset {dataset}.') for i, d in enumerate(dataset): cur_path = os.path.join(save_path, name[i]) ensure_dir(cur_path) @@ -221,10 +230,10 @@ def get_data_loader(name, config, neg_sample_args): return GeneralFullDataLoader elif neg_sample_strategy == 'none': raise NotImplementedError( - '\033[1;31mThe use of external negative sampling for knowledge model has not been implemented\033[0m' + 'The use of external negative sampling for knowledge model has not been implemented' ) else: - raise NotImplementedError(f'\033[1;31mModel_type [{model_type}] has not been implemented.\033[0m') + raise NotImplementedError(f'Model_type [{model_type}] has not been implemented.') def _get_DIN_data_loader(name, config, neg_sample_args): diff --git a/recbole/evaluator/abstract_evaluator.py b/recbole/evaluator/abstract_evaluator.py index 88f4ad2e3..45254c9a6 100644 --- a/recbole/evaluator/abstract_evaluator.py +++ b/recbole/evaluator/abstract_evaluator.py @@ -114,7 +114,7 @@ def full_sort_collect(self, true_scores, pred_scores): """it is called when evaluation sample distribution is `full`. """ - raise NotImplementedError('\033[1;31mfull sort can\'t use IndividualEvaluator\033[0m') + raise NotImplementedError('full sort can\'t use IndividualEvaluator') def get_score_matrix(self, true_scores, pred_scores): """get score matrix @@ -133,4 +133,4 @@ def get_score_matrix(self, true_scores, pred_scores): def _check_args(self): if self.full: - raise NotImplementedError('\033[1;31mfull sort can\'t use IndividualEvaluator\033[0m') + raise NotImplementedError('full sort can\'t use IndividualEvaluator') diff --git a/recbole/evaluator/evaluators.py b/recbole/evaluator/evaluators.py index 56f7f1ec1..8b0d97238 100644 --- a/recbole/evaluator/evaluators.py +++ b/recbole/evaluator/evaluators.py @@ -113,11 +113,11 @@ def _check_args(self): for topk in self.topk: if topk <= 0: raise ValueError( - '\033[1;31mtopk must be a positive integer or a list of positive integers, \033[0m' - '\033[1;31mbut get `{}`\033[0m'.format(topk) + 'topk must be a positive integer or a list of positive integers, ' + 'but get `{}`'.format(topk) ) else: - raise TypeError('\033[1;31mThe topk must be a integer, list\033[0m') + raise TypeError('The topk must be a integer, list') def _calculate_metrics(self, pos_len_list, topk_idx, shapes): """integrate the results of each batch and evaluate the topk metrics by users diff --git a/recbole/evaluator/proxy_evaluator.py b/recbole/evaluator/proxy_evaluator.py index fa07f4d89..a0df5246e 100644 --- a/recbole/evaluator/proxy_evaluator.py +++ b/recbole/evaluator/proxy_evaluator.py @@ -101,9 +101,9 @@ def _check_args(self): if isinstance(self.metrics, str): self.metrics = [self.metrics] else: - raise TypeError('\033[1;31mmetrics must be str or list\033[0m') + raise TypeError('metrics must be str or list') # Convert metric to lowercase for m in self.metrics: if m.lower() not in self.valid_metrics: - raise ValueError("\033[1;31mThere is no metric named {}!\033[0m".format(m)) + raise ValueError("There is no metric named {}!".format(m)) diff --git a/recbole/evaluator/utils.py b/recbole/evaluator/utils.py index 548f36ac7..392873493 100644 --- a/recbole/evaluator/utils.py +++ b/recbole/evaluator/utils.py @@ -66,7 +66,7 @@ def trunc(scores, method): try: cut_method = getattr(np, method) except NotImplementedError: - raise NotImplementedError("\033[1;31mmodule 'numpy' has no function named '{}'\033[0m".format(method)) + raise NotImplementedError("module 'numpy' has no function named '{}'".format(method)) scores = cut_method(scores) return scores diff --git a/recbole/model/abstract_recommender.py b/recbole/model/abstract_recommender.py index 026573055..b677cf909 100644 --- a/recbole/model/abstract_recommender.py +++ b/recbole/model/abstract_recommender.py @@ -20,6 +20,7 @@ from recbole.model.layers import FMEmbedding, FMFirstOrderLinear from recbole.utils import ModelType, InputType, FeatureSource, FeatureType +from recbole.utils.utils import set_color class AbstractRecommender(nn.Module): @@ -71,7 +72,7 @@ def __str__(self): """ model_parameters = filter(lambda p: p.requires_grad, self.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) - return super().__str__() + '\n\033[1;34mTrainable parameters\033[0m: {}'.format(params) + return super().__str__() + set_color('\nTrainable parameters', 'blue') + ': {}'.format(params) class GeneralRecommender(AbstractRecommender): @@ -326,7 +327,7 @@ def double_tower_embed_input_fields(self, interaction): """ if not self.double_tower: - raise RuntimeError('\033[1;31mPlease check your model hyper parameters and set \'double tower\' as True\033[0m') + raise RuntimeError('Please check your model hyper parameters and set \'double tower\' as True') sparse_embedding, dense_embedding = self.embed_input_fields(interaction) if dense_embedding is not None: first_dense_embedding, second_dense_embedding = \ diff --git a/recbole/model/general_recommender/cdae.py b/recbole/model/general_recommender/cdae.py index b67d43f0b..627b01abd 100644 --- a/recbole/model/general_recommender/cdae.py +++ b/recbole/model/general_recommender/cdae.py @@ -50,14 +50,14 @@ def __init__(self, config, dataset): elif self.hid_activation == 'tanh': self.h_act = nn.Tanh() else: - raise ValueError('\033[1;31mInvalid hidden layer activation function\033[0m') + raise ValueError('Invalid hidden layer activation function') if self.out_activation == 'sigmoid': self.o_act = nn.Sigmoid() elif self.out_activation == 'relu': self.o_act = nn.ReLU() else: - raise ValueError('\033[1;31mInvalid output layer activation function\033[0m') + raise ValueError('Invalid output layer activation function') self.dropout = nn.Dropout(p=self.corruption_ratio) @@ -104,7 +104,7 @@ def calculate_loss(self, interaction): elif self.loss_type == 'BCE': loss_func = nn.BCELoss(reduction='sum') else: - raise ValueError('\033[1;31mInvalid loss_type, loss_type must in [MSE, BCE]\033[0m') + raise ValueError('Invalid loss_type, loss_type must in [MSE, BCE]') loss = loss_func(predict, x_items) # l1-regularization diff --git a/recbole/model/general_recommender/dmf.py b/recbole/model/general_recommender/dmf.py index 4c7db632d..7c0fbe57d 100644 --- a/recbole/model/general_recommender/dmf.py +++ b/recbole/model/general_recommender/dmf.py @@ -65,7 +65,7 @@ def __init__(self, config, dataset): self.history_item_id, self.history_item_value, _ = dataset.history_item_matrix(value_field=self.RATING) self.interaction_matrix = dataset.inter_matrix(form='csr', value_field=self.RATING).astype(np.float32) else: - raise ValueError("\033[1;31mThe inter_matrix_type must in ['01', 'rating'] but get {}\033[0m".format(self.inter_matrix_type)) + raise ValueError("The inter_matrix_type must in ['01', 'rating'] but get {}".format(self.inter_matrix_type)) self.max_rating = self.history_user_value.max() # tensor of shape [n_items, H] where H is max length of history interaction. self.history_user_id = self.history_user_id.to(self.device) diff --git a/recbole/model/general_recommender/nais.py b/recbole/model/general_recommender/nais.py index c7e669b8e..c59cf594b 100644 --- a/recbole/model/general_recommender/nais.py +++ b/recbole/model/general_recommender/nais.py @@ -78,7 +78,7 @@ def __init__(self, config, dataset): elif self.algorithm == 'prod': self.mlp_layers = MLPLayers([self.embedding_size, self.weight_size]) else: - raise ValueError("\033[1;31mNAIS just support attention type in ['concat', 'prod'] but get {}\033[0m".format(self.algorithm)) + raise ValueError("NAIS just support attention type in ['concat', 'prod'] but get {}".format(self.algorithm)) self.weight_layer = nn.Parameter(torch.ones(self.weight_size, 1)) self.bceloss = nn.BCELoss() diff --git a/recbole/model/general_recommender/neumf.py b/recbole/model/general_recommender/neumf.py index a23673bdb..33d11d46f 100644 --- a/recbole/model/general_recommender/neumf.py +++ b/recbole/model/general_recommender/neumf.py @@ -116,7 +116,7 @@ def forward(self, user, item): elif self.mlp_train: output = self.sigmoid(self.predict_layer(mlp_output)) else: - raise RuntimeError('\033[1;31mmf_train and mlp_train can not be False at the same time\033[0m') + raise RuntimeError('mf_train and mlp_train can not be False at the same time') return output.squeeze() def calculate_loss(self, interaction): diff --git a/recbole/model/general_recommender/nncf.py b/recbole/model/general_recommender/nncf.py index e64af1dce..79692eada 100644 --- a/recbole/model/general_recommender/nncf.py +++ b/recbole/model/general_recommender/nncf.py @@ -80,8 +80,8 @@ def __init__(self, config, dataset): elif self.neigh_info_method == "louvain": self.u_neigh, self.i_neigh = self.get_neigh_louvain() else: - raise RuntimeError('\033[1;31mYou need to choose the right algorithm of processing neighborhood information. \ - The parameter neigh_info_method can be set to random, knn or louvain.\033[0m') + raise RuntimeError('You need to choose the right algorithm of processing neighborhood information. \ + The parameter neigh_info_method can be set to random, knn or louvain.') # parameters initialization self.apply(self._init_weights) diff --git a/recbole/model/layers.py b/recbole/model/layers.py index c984de9e0..3edb31568 100644 --- a/recbole/model/layers.py +++ b/recbole/model/layers.py @@ -113,7 +113,7 @@ def activation_layer(activation_name='relu', emb_dim=None): elif issubclass(activation_name, nn.Module): activation = activation_name() else: - raise NotImplementedError("\033[1;31mactivation function {} is not implemented\033[0m".format(activation_name)) + raise NotImplementedError("activation function {} is not implemented".format(activation_name)) return activation @@ -352,8 +352,8 @@ def __init__(self, n_heads, hidden_size, hidden_dropout_prob, attn_dropout_prob, super(MultiHeadAttention, self).__init__() if hidden_size % n_heads != 0: raise ValueError( - "\033[1;31mThe hidden size (%d) is not a multiple of the number of attention \033[0m" - "\033[1;31mheads (%d)\033[0m" % (hidden_size, n_heads) + "The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (hidden_size, n_heads) ) self.num_attention_heads = n_heads @@ -795,7 +795,7 @@ def __init__(self, dataset, embedding_size, pooling_mode, device): try: assert self.pooling_mode in ['mean', 'max', 'sum'] except AssertionError: - raise AssertionError("\033[1;31mMake sure 'pooling_mode' in ['mean', 'max', 'sum']!\033[0m") + raise AssertionError("Make sure 'pooling_mode' in ['mean', 'max', 'sum']!") self.get_fields_name_dim() self.get_embedding() @@ -820,7 +820,7 @@ def __init__(self, dataset, embedding_size, selected_features, pooling_mode, dev try: assert self.pooling_mode in ['mean', 'max', 'sum'] except AssertionError: - raise AssertionError("\033[1;31mMake sure 'pooling_mode' in ['mean', 'max', 'sum']!\033[0m") + raise AssertionError("Make sure 'pooling_mode' in ['mean', 'max', 'sum']!") self.get_fields_name_dim() self.get_embedding() @@ -866,7 +866,7 @@ def __init__(self, channels, kernels, strides, activation='relu', init_method=No self.num_of_nets = len(self.channels) - 1 if len(kernels) != len(strides) or self.num_of_nets != (len(kernels)): - raise RuntimeError('\033[1;31mchannels, kernels and strides don\'t match\n\033[0m') + raise RuntimeError('channels, kernels and strides don\'t match\n') cnn_modules = [] diff --git a/recbole/model/sequential_recommender/bert4rec.py b/recbole/model/sequential_recommender/bert4rec.py index 6d52588ac..1a923b7db 100644 --- a/recbole/model/sequential_recommender/bert4rec.py +++ b/recbole/model/sequential_recommender/bert4rec.py @@ -70,7 +70,7 @@ def __init__(self, config, dataset): try: assert self.loss_type in ['BPR', 'CE'] except AssertionError: - raise AssertionError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise AssertionError("Make sure 'loss_type' in ['BPR', 'CE']!") # parameters initialization self.apply(self._init_weights) @@ -232,7 +232,7 @@ def calculate_loss(self, interaction): / torch.sum(targets) return loss else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") def predict(self, interaction): item_seq = interaction[self.ITEM_SEQ] diff --git a/recbole/model/sequential_recommender/caser.py b/recbole/model/sequential_recommender/caser.py index 4d5117475..229816deb 100644 --- a/recbole/model/sequential_recommender/caser.py +++ b/recbole/model/sequential_recommender/caser.py @@ -83,7 +83,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") # parameters initialization self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/fdsa.py b/recbole/model/sequential_recommender/fdsa.py index a4d21e51a..0fba79689 100644 --- a/recbole/model/sequential_recommender/fdsa.py +++ b/recbole/model/sequential_recommender/fdsa.py @@ -89,7 +89,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") # parameters initialization self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/fossil.py b/recbole/model/sequential_recommender/fossil.py index e4fa67458..0432174bf 100644 --- a/recbole/model/sequential_recommender/fossil.py +++ b/recbole/model/sequential_recommender/fossil.py @@ -54,7 +54,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") # init the parameters of the model self.apply(self.init_weights) diff --git a/recbole/model/sequential_recommender/gcsan.py b/recbole/model/sequential_recommender/gcsan.py index ac77f9be7..e64381473 100644 --- a/recbole/model/sequential_recommender/gcsan.py +++ b/recbole/model/sequential_recommender/gcsan.py @@ -139,7 +139,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") # parameters initialization self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/gru4rec.py b/recbole/model/sequential_recommender/gru4rec.py index 1a6b9d23a..0ea93e331 100644 --- a/recbole/model/sequential_recommender/gru4rec.py +++ b/recbole/model/sequential_recommender/gru4rec.py @@ -61,7 +61,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") # parameters initialization self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/gru4recf.py b/recbole/model/sequential_recommender/gru4recf.py index 5585cf8d3..55f8d2360 100644 --- a/recbole/model/sequential_recommender/gru4recf.py +++ b/recbole/model/sequential_recommender/gru4recf.py @@ -81,7 +81,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") # parameters initialization self.apply(xavier_normal_initialization) diff --git a/recbole/model/sequential_recommender/gru4reckg.py b/recbole/model/sequential_recommender/gru4reckg.py index 66dfae29e..e8e68d5d8 100644 --- a/recbole/model/sequential_recommender/gru4reckg.py +++ b/recbole/model/sequential_recommender/gru4reckg.py @@ -64,7 +64,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") # parameters initialization self.apply(xavier_normal_initialization) diff --git a/recbole/model/sequential_recommender/hgn.py b/recbole/model/sequential_recommender/hgn.py index 13c70d071..dc5f6ec49 100644 --- a/recbole/model/sequential_recommender/hgn.py +++ b/recbole/model/sequential_recommender/hgn.py @@ -41,7 +41,7 @@ def __init__(self, config, dataset): self.pool_type = config["pooling_type"] if self.pool_type not in ["max", "average"]: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['max', 'average']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['max', 'average']!") # define the layers and loss function self.item_embedding = nn.Embedding(self.n_items, self.embedding_size, padding_idx=0) @@ -67,7 +67,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") # init the parameters of the model self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/hrm.py b/recbole/model/sequential_recommender/hrm.py index f65de9613..421835266 100644 --- a/recbole/model/sequential_recommender/hrm.py +++ b/recbole/model/sequential_recommender/hrm.py @@ -59,7 +59,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") # init the parameters of the model self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/ksr.py b/recbole/model/sequential_recommender/ksr.py index 1640edd17..bf48c6719 100644 --- a/recbole/model/sequential_recommender/ksr.py +++ b/recbole/model/sequential_recommender/ksr.py @@ -71,7 +71,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") # parameters initialization self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/narm.py b/recbole/model/sequential_recommender/narm.py index 44f7be530..76f5594b7 100644 --- a/recbole/model/sequential_recommender/narm.py +++ b/recbole/model/sequential_recommender/narm.py @@ -59,7 +59,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") # parameters initialization self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/nextitnet.py b/recbole/model/sequential_recommender/nextitnet.py index eb1179351..e0ad3def4 100644 --- a/recbole/model/sequential_recommender/nextitnet.py +++ b/recbole/model/sequential_recommender/nextitnet.py @@ -69,7 +69,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") self.reg_loss = RegLoss() # parameters initialization diff --git a/recbole/model/sequential_recommender/npe.py b/recbole/model/sequential_recommender/npe.py index d9cd4bd78..29674241a 100644 --- a/recbole/model/sequential_recommender/npe.py +++ b/recbole/model/sequential_recommender/npe.py @@ -55,7 +55,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") # init the parameters of the module self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/s3rec.py b/recbole/model/sequential_recommender/s3rec.py index 9334fd5c1..1aa86184f 100644 --- a/recbole/model/sequential_recommender/s3rec.py +++ b/recbole/model/sequential_recommender/s3rec.py @@ -103,7 +103,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE' and self.train_stage == 'finetune': self.loss_fct = nn.CrossEntropyLoss() elif self.train_stage == 'finetune': - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") # parameters initialization assert self.train_stage in ['pretrain', 'finetune'] diff --git a/recbole/model/sequential_recommender/sasrec.py b/recbole/model/sequential_recommender/sasrec.py index 920b90b47..ea58a8fdf 100644 --- a/recbole/model/sequential_recommender/sasrec.py +++ b/recbole/model/sequential_recommender/sasrec.py @@ -71,7 +71,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") # parameters initialization self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/sasrecf.py b/recbole/model/sequential_recommender/sasrecf.py index bac276f5e..b6c4feb4f 100644 --- a/recbole/model/sequential_recommender/sasrecf.py +++ b/recbole/model/sequential_recommender/sasrecf.py @@ -70,7 +70,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") # parameters initialization self.apply(self._init_weights) diff --git a/recbole/model/sequential_recommender/shan.py b/recbole/model/sequential_recommender/shan.py index 7599d6773..19d7d435d 100644 --- a/recbole/model/sequential_recommender/shan.py +++ b/recbole/model/sequential_recommender/shan.py @@ -74,7 +74,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") # init the parameter of the model self.apply(self.init_weights) diff --git a/recbole/model/sequential_recommender/srgnn.py b/recbole/model/sequential_recommender/srgnn.py index 9f275969f..0147f1499 100644 --- a/recbole/model/sequential_recommender/srgnn.py +++ b/recbole/model/sequential_recommender/srgnn.py @@ -135,7 +135,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") # parameters initialization self._reset_parameters() diff --git a/recbole/model/sequential_recommender/stamp.py b/recbole/model/sequential_recommender/stamp.py index b63e568ae..f9982734e 100644 --- a/recbole/model/sequential_recommender/stamp.py +++ b/recbole/model/sequential_recommender/stamp.py @@ -60,7 +60,7 @@ def __init__(self, config, dataset): elif self.loss_type == 'CE': self.loss_fct = nn.CrossEntropyLoss() else: - raise NotImplementedError("\033[1;31mMake sure 'loss_type' in ['BPR', 'CE']!\033[0m") + raise NotImplementedError("Make sure 'loss_type' in ['BPR', 'CE']!") # # parameters initialization self.apply(self._init_weights) diff --git a/recbole/quick_start/quick_start.py b/recbole/quick_start/quick_start.py index 7062e584f..aecaf61e0 100644 --- a/recbole/quick_start/quick_start.py +++ b/recbole/quick_start/quick_start.py @@ -12,6 +12,7 @@ from recbole.config import Config from recbole.data import create_dataset, data_preparation from recbole.utils import init_logger, get_model, get_trainer, init_seed +from recbole.utils.utils import set_color def run_recbole(model=None, dataset=None, config_file_list=None, config_dict=None, saved=True): @@ -57,8 +58,8 @@ def run_recbole(model=None, dataset=None, config_file_list=None, config_dict=Non # model evaluation test_result = trainer.evaluate(test_data, load_best_model=saved, show_progress=config['show_progress']) - logger.info('\033[1;33mbest valid \033[0m: {}'.format(best_valid_result)) - logger.info('\033[1;33mtest result\033[0m: {}'.format(test_result)) + logger.info(set_color('best valid ', 'yellow') + ': {}'.format(best_valid_result)) + logger.info(set_color('test result', 'yellow') + ': {}'.format(test_result)) return { 'best_valid_score': best_valid_score, diff --git a/recbole/sampler/sampler.py b/recbole/sampler/sampler.py index 58b30c100..e0e1a0d9b 100644 --- a/recbole/sampler/sampler.py +++ b/recbole/sampler/sampler.py @@ -61,14 +61,14 @@ def get_random_list(self): Returns: numpy.ndarray or list: Random list of value_id. """ - raise NotImplementedError('\033[1;31mmethod [get_random_list] should be implemented\033[0m') + raise NotImplementedError('method [get_random_list] should be implemented') def get_used_ids(self): """ Returns: numpy.ndarray: Used ids. Index is key_id, and element is a set of value_ids. """ - raise NotImplementedError('\033[1;31mmethod [get_used_ids] should be implemented\033[0m') + raise NotImplementedError('method [get_used_ids] should be implemented') def random(self): """ @@ -175,7 +175,7 @@ def __init__(self, phases, datasets, distribution='uniform'): if not isinstance(datasets, list): datasets = [datasets] if len(phases) != len(datasets): - raise ValueError(f'\033[1;31mPhases {phases} and datasets {datasets} should have the same length.\033[0m') + raise ValueError(f'Phases {phases} and datasets {datasets} should have the same length.') self.phases = phases self.datasets = datasets @@ -201,7 +201,7 @@ def get_random_list(self): random_item_list.extend(dataset.inter_feat[self.iid_field].numpy()) return random_item_list else: - raise NotImplementedError(f'\033[1;31mDistribution [{self.distribution}] has not been implemented.\033[0m') + raise NotImplementedError(f'Distribution [{self.distribution}] has not been implemented.') def get_used_ids(self): """ @@ -220,9 +220,9 @@ def get_used_ids(self): for used_item_set in used_item_id[self.phases[-1]]: if len(used_item_set) + 1 == self.n_items: # [pad] is a item. raise ValueError( - '\033[1;31mSome users have interacted with all items, \033[0m' - '\033[1;31mwhich we can not sample negative items for them. \033[0m' - '\033[1;31mPlease set `max_user_inter_num` to filter those users.\033[0m' + 'Some users have interacted with all items, ' + 'which we can not sample negative items for them. ' + 'Please set `max_user_inter_num` to filter those users.' ) return used_item_id @@ -237,7 +237,7 @@ def set_phase(self, phase): is set to the value of corresponding phase. """ if phase not in self.phases: - raise ValueError(f'\033[1;31mPhase [{phase}] not exist.\033[0m') + raise ValueError(f'Phase [{phase}] not exist.') new_sampler = copy.copy(self) new_sampler.phase = phase new_sampler.used_ids = new_sampler.used_ids[phase] @@ -262,7 +262,7 @@ def sample_by_user_ids(self, user_ids, num): except IndexError: for user_id in user_ids: if user_id < 0 or user_id >= self.n_users: - raise ValueError(f'\033[1;31muser_id [{user_id}] not exist.\033[0m') + raise ValueError(f'user_id [{user_id}] not exist.') class KGSampler(AbstractSampler): @@ -296,7 +296,7 @@ def get_random_list(self): elif self.distribution == 'popularity': return list(self.hid_list) + list(self.tid_list) else: - raise NotImplementedError(f'\033[1;31mDistribution [{self.distribution}] has not been implemented.\033[0m') + raise NotImplementedError(f'Distribution [{self.distribution}] has not been implemented.') def get_used_ids(self): """ @@ -311,8 +311,8 @@ def get_used_ids(self): for used_tail_set in used_tail_entity_id: if len(used_tail_set) + 1 == self.entity_num: # [pad] is a entity. raise ValueError( - '\033[1;31mSome head entities have relation with all entities, \033[0m' - '\033[1;31mwhich we can not sample negative entities for them.\033[0m' + 'Some head entities have relation with all entities, ' + 'which we can not sample negative entities for them.' ) return used_tail_entity_id @@ -335,7 +335,7 @@ def sample_by_entity_ids(self, head_entity_ids, num=1): except IndexError: for head_entity_id in head_entity_ids: if head_entity_id not in self.head_entities: - raise ValueError(f'\033[1;31mhead_entity_id [{head_entity_id}] not exist.\033[0m') + raise ValueError(f'head_entity_id [{head_entity_id}] not exist.') class RepeatableSampler(AbstractSampler): @@ -373,7 +373,7 @@ def get_random_list(self): elif self.distribution == 'popularity': return self.dataset.inter_feat[self.iid_field].numpy() else: - raise NotImplementedError(f'\033[1;31mDistribution [{self.distribution}] has not been implemented.\033[0m') + raise NotImplementedError(f'Distribution [{self.distribution}] has not been implemented.') def get_used_ids(self): """ @@ -402,7 +402,7 @@ def sample_by_user_ids(self, user_ids, num): except IndexError: for user_id in user_ids: if user_id < 0 or user_id >= self.n_users: - raise ValueError(f'\033[1;31muser_id [{user_id}] not exist.\033[0m') + raise ValueError(f'user_id [{user_id}] not exist.') def set_phase(self, phase): """Get the sampler of corresponding phase. @@ -414,7 +414,7 @@ def set_phase(self, phase): Sampler: the copy of this sampler, and :attr:`phase` is set the same as input phase. """ if phase not in self.phases: - raise ValueError(f'\033[1;31mPhase [{phase}] not exist.\033[0m') + raise ValueError(f'Phase [{phase}] not exist.') new_sampler = copy.copy(self) new_sampler.phase = phase return new_sampler diff --git a/recbole/trainer/hyper_tuning.py b/recbole/trainer/hyper_tuning.py index 4853037e9..dda0c49e5 100644 --- a/recbole/trainer/hyper_tuning.py +++ b/recbole/trainer/hyper_tuning.py @@ -76,8 +76,8 @@ def _validate_space_exhaustive_search(space): if node.name in implicit_stochastic_symbols: if node.name not in supported_stochastic_symbols: raise ExhaustiveSearchError( - '\033[1;31mExhaustive search is only possible with the following stochastic symbols: \033[0m' - '\033[1;31m' + ', '.join(supported_stochastic_symbols) + 'Exhaustive search is only possible with the following stochastic symbols: ' + '' + ', '.join(supported_stochastic_symbols) ) @@ -163,13 +163,13 @@ def __init__( elif params_dict: self.space = self._build_space_from_dict(params_dict) else: - raise ValueError('\033[1;31mat least one of `space`, `params_file` and `params_dict` is provided\033[0m') + raise ValueError('at least one of `space`, `params_file` and `params_dict` is provided') if isinstance(algo, str): if algo == 'exhaustive': self.algo = partial(exhaustive_search, nbMaxSucessiveFailures=1000) self.max_evals = _spacesize(self.space) else: - raise ValueError('\033[1;31mIllegal algo [{}]\033[0m'.format(algo)) + raise ValueError('Illegal algo [{}]'.format(algo)) else: self.algo = algo @@ -196,7 +196,7 @@ def _build_space_from_file(file): low, high = para_value.strip().split(',') space[para_name] = hp.loguniform(para_name, float(low), float(high)) else: - raise ValueError('\033[1;31mIllegal param type [{}]\033[0m'.format(para_type)) + raise ValueError('Illegal param type [{}]'.format(para_type)) return space @staticmethod @@ -228,7 +228,7 @@ def _build_space_from_dict(config_dict): high = para_value[1] space[para_name] = hp.loguniform(para_name, float(low), float(high)) else: - raise ValueError('\033[1;31mIllegal param type [{}]\033[0m'.format(para_type)) + raise ValueError('Illegal param type [{}]'.format(para_type)) return space @staticmethod diff --git a/recbole/trainer/trainer.py b/recbole/trainer/trainer.py index f7da7e4ae..b16eb943c 100644 --- a/recbole/trainer/trainer.py +++ b/recbole/trainer/trainer.py @@ -26,6 +26,7 @@ from recbole.evaluator import ProxyEvaluator from recbole.utils import ensure_dir, get_local_time, early_stopping, calculate_valid_score, dict2str, \ DataLoaderType, KGDataLoaderState +from recbole.utils.utils import set_color class AbstractTrainer(object): @@ -42,14 +43,14 @@ def fit(self, train_data): r"""Train the model based on the train data. """ - raise NotImplementedError('\033[1;31mMethod [next] should be implemented.\033[0m') + raise NotImplementedError('Method [next] should be implemented.') def evaluate(self, eval_data): r"""Evaluate the model based on the eval data. """ - raise NotImplementedError('\033[1;31mMethod [next] should be implemented.\033[0m') + raise NotImplementedError('Method [next] should be implemented.') class Trainer(AbstractTrainer): @@ -144,7 +145,7 @@ def _train_epoch(self, train_data, epoch_idx, loss_func=None, show_progress=Fals tqdm( enumerate(train_data), total=len(train_data), - desc=f"\033[1;35mTrain {epoch_idx:>5}\033[0m", + desc=set_color("Train {:>5}", 'pink').format(epoch_idx), ) if show_progress else enumerate(train_data) ) for batch_idx, interaction in iter_data: @@ -225,17 +226,17 @@ def resume_checkpoint(self, resume_file): def _check_nan(self, loss): if torch.isnan(loss): - raise ValueError('\033[1;31mTraining loss is nan\033[0m') + raise ValueError('Training loss is nan') def _generate_train_loss_output(self, epoch_idx, s_time, e_time, losses): des = self.config['loss_decimal_place'] or 4 - train_loss_output = '\033[1;32mepoch %d training\033[0m [\033[1;34mtime\033[0m: %.2fs, ' % (epoch_idx, e_time - s_time) + train_loss_output = (set_color('epoch %d training', 'green') + ' [' + set_color('time', 'blue') + ': %.2fs, ') % (epoch_idx, e_time - s_time) if isinstance(losses, tuple): - des = '\033[1;34mtrain_loss%d\033[0m: %.' + str(des) + 'f' + des = (set_color('train_loss%d', 'blue') + ': %.' + str(des) + 'f') train_loss_output += ', '.join(des % (idx + 1, loss) for idx, loss in enumerate(losses)) else: des = '%.' + str(des) + 'f' - train_loss_output += '\033[1;34mtrain loss\033[0m:' + des % losses + train_loss_output += set_color('train loss', 'blue') + ': ' + des % losses return train_loss_output + ']' def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progress=False, callback_fn=None): @@ -272,7 +273,7 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre if self.eval_step <= 0 or not valid_data: if saved: self._save_checkpoint(epoch_idx) - update_output = '\033[1;32mSaving current\033[0m: %s' % self.saved_model_file + update_output = set_color('Saving current', 'blue') + ': %s' % self.saved_model_file if verbose: self.logger.info(update_output) continue @@ -287,16 +288,17 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre bigger=self.valid_metric_bigger ) valid_end_time = time() - valid_score_output = "\033[1;32mepoch %d evaluating\033[0m [\033[1;34mtime\033[0m: %.2fs, \033[1;34mvalid_score\033[0m: %f]" % \ + valid_score_output = (set_color("epoch %d evaluating", 'green') + " [" + set_color("time", 'blue') + + ": %.2fs, " + set_color("valid_score", 'blue') + ": %f]") % \ (epoch_idx, valid_end_time - valid_start_time, valid_score) - valid_result_output = '\033[1;34mvalid result\033[0m: \n' + dict2str(valid_result) + valid_result_output = set_color('valid result', 'blue') + ': \n' + dict2str(valid_result) if verbose: self.logger.info(valid_score_output) self.logger.info(valid_result_output) if update_flag: if saved: self._save_checkpoint(epoch_idx) - update_output = '\033[1;34mSaving current best\033[0m: %s' % self.saved_model_file + update_output = set_color('Saving current best', 'blue') + ': %s' % self.saved_model_file if verbose: self.logger.info(update_output) self.best_valid_result = valid_result @@ -380,7 +382,7 @@ def evaluate(self, eval_data, load_best_model=True, model_file=None, show_progre tqdm( enumerate(eval_data), total=len(eval_data), - desc=f"\033[1;35mEvaluate\033[0m ", + desc=set_color("Evaluate ", 'pink'), ) if show_progress else enumerate(eval_data) ) for batch_idx, batched_data in iter_data: @@ -541,7 +543,7 @@ def pretrain(self, train_data, verbose=True, show_progress=False): '{}-{}-{}.pth'.format(self.config['model'], self.config['dataset'], str(epoch_idx + 1)) ) self.save_pretrained_model(epoch_idx, saved_model_file) - update_output = '\033[0;34mSaving current\033[0m: %s' % saved_model_file + update_output = set_color('Saving current', 'blue') + ': %s' % saved_model_file if verbose: self.logger.info(update_output) @@ -553,7 +555,7 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre elif self.model.train_stage == 'finetune': return super().fit(train_data, valid_data, verbose, saved, show_progress, callback_fn) else: - raise ValueError("\033[1;31mPlease make sure that the 'train_stage' is 'pretrain' or 'finetune' \033[0m") + raise ValueError("Please make sure that the 'train_stage' is 'pretrain' or 'finetune' ") class MKRTrainer(Trainer): @@ -701,9 +703,10 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre valid_start_time = time() valid_result, valid_score = self._valid_epoch(valid_data) valid_end_time = time() - valid_score_output = "\033[0;34mepoch %d evaluating [time\033[0m: %.2fs, valid_score: %f]" % \ + valid_score_output = (set_color("epoch %d evaluating", 'green') + " [" + set_color("time", 'blue') + + ": %.2fs, " + set_color("valid_score", 'blue') + ": %f]") % \ (epoch_idx, valid_end_time - valid_start_time, valid_score) - valid_result_output = '\033[0;34mvalid result\033[0m: \n' + dict2str(valid_result) + valid_result_output = set_color('valid result', 'blue') + ': \n' + dict2str(valid_result) if verbose: self.logger.info(valid_score_output) self.logger.info(valid_result_output) @@ -868,7 +871,7 @@ def _train_epoch(self, train_data, epoch_idx, n_epochs, optimizer, encoder_flag, tqdm( enumerate(train_data), total=len(train_data), - desc=f"Train {epoch_idx:>5}", + desc=set_color("Train {:>5}", 'pink').format(epoch_idx), ) if show_progress else enumerate(train_data) ) for epoch in range(n_epochs): @@ -935,16 +938,16 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre bigger=self.valid_metric_bigger ) valid_end_time = time() - valid_score_output = "epoch %d evaluating [time: %.2fs, valid_score: %f]" % \ + valid_score_output = (set_color("epoch %d evaluating", 'blue') + " [" + set_color("time", 'blue') + ": %.2fs, " + set_color("valid_score", 'blue') + ": %f]") % \ (epoch_idx, valid_end_time - valid_start_time, valid_score) - valid_result_output = 'valid result: \n' + dict2str(valid_result) + valid_result_output = set_color('valid result', 'blue') + ': \n' + dict2str(valid_result) if verbose: self.logger.info(valid_score_output) self.logger.info(valid_result_output) if update_flag: if saved: self._save_checkpoint(epoch_idx) - update_output = 'Saving current best: %s' % self.saved_model_file + update_output = set_color('Saving current best', 'blue') + ': %s' % self.saved_model_file if verbose: self.logger.info(update_output) self.best_valid_result = valid_result diff --git a/recbole/utils/utils.py b/recbole/utils/utils.py index a6c29eef8..6070f2e57 100644 --- a/recbole/utils/utils.py +++ b/recbole/utils/utils.py @@ -3,6 +3,11 @@ # @Author : Shanlei Mu # @Email : slmu@ruc.edu.cn +# UPDATE +# @Time : 2021/3/8 +# @Author : Jiawei Guan +# @Email : guanjw@ruc.edu.cn + """ recbole.utils.utils ################################ @@ -65,7 +70,7 @@ def get_model(model_name): break if model_module is None: - raise ValueError('\033[1;31m`model_name` [{}] is not the name of an existing model.\033[0m'.format(model_name)) + raise ValueError('`model_name` [{}] is not the name of an existing model.'.format(model_name)) model_class = getattr(model_module, model_name) return model_class @@ -185,3 +190,18 @@ def init_seed(seed, reproducibility): else: torch.backends.cudnn.benchmark = True torch.backends.cudnn.deterministic = False + + +def set_color(log, color, highlight=True): + color_set = ['black', 'red', 'green', 'yellow', 'blue', 'pink', 'cyan', 'white'] + try: + index = color_set.index(color) + except: + index = len(color_set) - 1 + prev_log = '\033[' + if highlight: + prev_log += '1;3' + else: + prev_log += '0;3' + prev_log += str(index) + 'm' + return prev_log + log + '\033[0m' From 45c4a258f00001f175ede96cad4fe89cb132bba3 Mon Sep 17 00:00:00 2001 From: Guan-JW <15692276873@163.com> Date: Mon, 8 Mar 2021 23:22:33 +0800 Subject: [PATCH 14/16] add set_color --- recbole/data/dataset/dataset.py | 6 +++--- recbole/evaluator/metrics.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/recbole/data/dataset/dataset.py b/recbole/data/dataset/dataset.py index 0e4c6ac48..177279d7e 100644 --- a/recbole/data/dataset/dataset.py +++ b/recbole/data/dataset/dataset.py @@ -102,7 +102,7 @@ def _from_scratch(self): """Load dataset from scratch. Initialize attributes firstly, then load data from atomic files, pre-process the dataset lastly. """ - self.logger.debug(set_color('Loading {self.__class__} from scratch.', 'green')) + self.logger.debug(set_color('Loading {} from scratch.', 'green').format(self.__class__)) self._get_preset() self._get_field_from_config() @@ -335,7 +335,7 @@ def _get_load_and_unload_col(self, source): if load_col and unload_col: raise ValueError(f'load_col [{load_col}] and unload_col [{unload_col}] can not be set the same time.') - self.logger.debug(set_color('[{source}]: ', 'pink')) + self.logger.debug(set_color('[{}]: ', 'pink').format(source)) self.logger.debug(set_color('\t load_col', 'blue') + ': [{load_col}]') self.logger.debug(set_color('\t unload_col', 'blue') + ': [{unload_col}]') return load_col, unload_col @@ -357,7 +357,7 @@ def _load_feat(self, filepath, source): Their length is limited only after calling :meth:`~_dict_to_interaction` or :meth:`~_dataframe_to_interaction` """ - self.logger.debug(set_color('Loading feature from [{filepath}] (source: [{source}]).', 'green')) + self.logger.debug(set_color('Loading feature from [{}] (source: [{}]).', 'green').format(filepath, source)) load_col, unload_col = self._get_load_and_unload_col(source) if load_col == set(): diff --git a/recbole/evaluator/metrics.py b/recbole/evaluator/metrics.py index eada10cb5..b059903a4 100644 --- a/recbole/evaluator/metrics.py +++ b/recbole/evaluator/metrics.py @@ -306,4 +306,4 @@ def log_loss_(trues, preds): 'logloss': log_loss_, 'auc': auc_, 'gauc': gauc_ -} \ No newline at end of file +} From 2f7702793de9b0bf0d105fdb1aa0ba27dd724e9e Mon Sep 17 00:00:00 2001 From: Guan-JW <15692276873@163.com> Date: Thu, 11 Mar 2021 10:32:00 +0800 Subject: [PATCH 15/16] replace format with f --- recbole/config/eval_setting.py | 8 +++---- recbole/data/dataset/dataset.py | 32 +++++++++++++-------------- recbole/data/dataset/kg_dataset.py | 10 ++++----- recbole/data/utils.py | 20 ++++++++--------- recbole/model/abstract_recommender.py | 2 +- recbole/quick_start/quick_start.py | 4 ++-- recbole/trainer/trainer.py | 4 ++-- recbole/utils/logger.py | 2 ++ requirements.txt | 3 ++- 9 files changed, 44 insertions(+), 41 deletions(-) diff --git a/recbole/config/eval_setting.py b/recbole/config/eval_setting.py index da11806c1..c67386ded 100644 --- a/recbole/config/eval_setting.py +++ b/recbole/config/eval_setting.py @@ -95,22 +95,22 @@ def __str__(self): info = [set_color('Evaluation Setting:', 'pink')] if self.group_field: - info.append(set_color('Group by', 'blue') + ' {}'.format(self.group_field)) + info.append(set_color('Group by', 'blue') + f' {self.group_field}') else: info.append(set_color('No Grouping', 'yellow')) if self.ordering_args is not None and self.ordering_args['strategy'] != 'none': - info.append(set_color('Ordering', 'blue') + ': {}'.format(self.ordering_args)) + info.append(set_color('Ordering', 'blue') + f': {self.ordering_args}') else: info.append(set_color('No Ordering', 'yellow')) if self.split_args is not None and self.split_args['strategy'] != 'none': - info.append(set_color('Splitting', 'blue') + ': {}'.format(self.split_args)) + info.append(set_color('Splitting', 'blue') + f': {self.split_args}') else: info.append(set_color('No Splitting', 'yellow')) if self.neg_sample_args is not None and self.neg_sample_args['strategy'] != 'none': - info.append(set_color('Negative Sampling', 'blue') + ': {}'.format(self.neg_sample_args)) + info.append(set_color('Negative Sampling', 'blue') + f': {self.neg_sample_args}') else: info.append(set_color('No Negative Sampling', 'yellow')) diff --git a/recbole/data/dataset/dataset.py b/recbole/data/dataset/dataset.py index 177279d7e..3c423c8ed 100644 --- a/recbole/data/dataset/dataset.py +++ b/recbole/data/dataset/dataset.py @@ -102,7 +102,7 @@ def _from_scratch(self): """Load dataset from scratch. Initialize attributes firstly, then load data from atomic files, pre-process the dataset lastly. """ - self.logger.debug(set_color('Loading {} from scratch.', 'green').format(self.__class__)) + self.logger.debug(set_color(f'Loading {self.__class__} from scratch.', 'green')) self._get_preset() self._get_field_from_config() @@ -135,8 +135,8 @@ def _get_field_from_config(self): 'USER_ID_FIELD and ITEM_ID_FIELD need to be set at the same time or not set at the same time.' ) - self.logger.debug(set_color('uid_field', 'blue') + ': {self.uid_field}') - self.logger.debug(set_color('iid_field', 'blue') + ': {self.iid_field}') + self.logger.debug(set_color('uid_field', 'blue') + f': {self.uid_field}') + self.logger.debug(set_color('iid_field', 'blue') + f': {self.iid_field}') def _data_processing(self): """Data preprocessing, including: @@ -335,9 +335,9 @@ def _get_load_and_unload_col(self, source): if load_col and unload_col: raise ValueError(f'load_col [{load_col}] and unload_col [{unload_col}] can not be set the same time.') - self.logger.debug(set_color('[{}]: ', 'pink').format(source)) - self.logger.debug(set_color('\t load_col', 'blue') + ': [{load_col}]') - self.logger.debug(set_color('\t unload_col', 'blue') + ': [{unload_col}]') + self.logger.debug(set_color(f'[{source}]: ', 'pink')) + self.logger.debug(set_color('\t load_col', 'blue') + f': [{load_col}]') + self.logger.debug(set_color('\t unload_col', 'blue') + f': [{unload_col}]') return load_col, unload_col def _load_feat(self, filepath, source): @@ -357,7 +357,7 @@ def _load_feat(self, filepath, source): Their length is limited only after calling :meth:`~_dict_to_interaction` or :meth:`~_dataframe_to_interaction` """ - self.logger.debug(set_color('Loading feature from [{}] (source: [{}]).', 'green').format(filepath, source)) + self.logger.debug(set_color(f'Loading feature from [{filepath}] (source: [{source}]).', 'green')) load_col, unload_col = self._get_load_and_unload_col(source) if load_col == set(): @@ -529,7 +529,7 @@ def _normalize(self): else: return - self.logger.debug(set_color('Normalized fields', 'blue') + ': {fields}') + self.logger.debug(set_color('Normalized fields', 'blue') + f': {fields}') for feat_name in self.feat_name_list: feat = getattr(self, feat_name) @@ -684,7 +684,7 @@ def _get_illegal_ids_by_inter_num(self, field, feat, inter_num, max_num=None, mi Returns: set: illegal ids, whose inter num out of [min_num, max_num] """ - self.logger.debug(set_color('get_illegal_ids_by_inter_num', 'blue') + ': field=[{field}], max_num=[{max_num}], min_num=[{min_num}]') + self.logger.debug(set_color('get_illegal_ids_by_inter_num', 'blue') + f': field=[{field}], max_num=[{max_num}], min_num=[{min_num}]') max_num = max_num or np.inf min_num = min_num or -1 @@ -729,7 +729,7 @@ def _drop_by_value(self, val, cmp): if val is None: return [] - self.logger.debug(set_color('drop_by_value', 'blue') + ': val={val}') + self.logger.debug(set_color('drop_by_value', 'blue') + f': val={val}') filter_field = [] for field in val: if field not in self.field2type: @@ -877,7 +877,7 @@ def _remap_ID_all(self): """Get ``config['fields_in_same_space']`` firstly, and remap each. """ fields_in_same_space = self._get_fields_in_same_space() - self.logger.debug(set_color('fields_in_same_space', 'blue') + ': {fields_in_same_space}') + self.logger.debug(set_color('fields_in_same_space', 'blue') + f': {fields_in_same_space}') for field_set in fields_in_same_space: remap_list = self._get_remap_list(field_set) self._remap(remap_list) @@ -1178,16 +1178,16 @@ def __str__(self): info = [set_color(self.dataset_name, 'pink')] if self.uid_field: info.extend([ - set_color('The number of users', 'blue') + ': {self.user_num}', set_color('Average actions of users', 'blue') + ': {self.avg_actions_of_users}' + set_color('The number of users', 'blue') + f': {self.user_num}', set_color('Average actions of users', 'blue') + f': {self.avg_actions_of_users}' ]) if self.iid_field: info.extend([ - set_color('The number of items', 'blue') + ': {self.item_num}', set_color('Average actions of items', 'blue') + ': {self.avg_actions_of_items}' + set_color('The number of items', 'blue') + f': {self.item_num}', set_color('Average actions of items', 'blue') + f': {self.avg_actions_of_items}' ]) - info.append(set_color('The number of inters', 'blue') + ': {self.inter_num}') + info.append(set_color('The number of inters', 'blue') + f': {self.inter_num}') if self.uid_field and self.iid_field: - info.append(set_color('The sparsity of the dataset', 'blue') + ': {self.sparsity * 100}%') - info.append(set_color('Remain Fields', 'blue') + ': {list(self.field2type)}') + info.append(set_color('The sparsity of the dataset', 'blue') + f': {self.sparsity * 100}%') + info.append(set_color('Remain Fields', 'blue') + f': {list(self.field2type)}') return '\n'.join(info) def copy(self, new_inter_feat): diff --git a/recbole/data/dataset/kg_dataset.py b/recbole/data/dataset/kg_dataset.py index dd193aaad..0bbf49635 100644 --- a/recbole/data/dataset/kg_dataset.py +++ b/recbole/data/dataset/kg_dataset.py @@ -81,8 +81,8 @@ def _get_field_from_config(self): self._check_field('head_entity_field', 'tail_entity_field', 'relation_field', 'entity_field') self.set_field_property(self.entity_field, FeatureType.TOKEN, FeatureSource.KG, 1) - self.logger.debug(set_color('relation_field', 'blue') + ': {self.relation_field}') - self.logger.debug(set_color('entity_field', 'blue') + ': {self.entity_field}') + self.logger.debug(set_color('relation_field', 'blue') + f': {self.relation_field}') + self.logger.debug(set_color('entity_field', 'blue') + f': {self.entity_field}') def _data_processing(self): self._set_field2ent_level() @@ -139,7 +139,7 @@ def save(self, filepath): raise NotImplementedError() def _load_kg(self, token, dataset_path): - self.logger.debug(set_color('Loading kg from [{}].', 'green').format(dataset_path)) + self.logger.debug(set_color(f'Loading kg from [{dataset_path}].', 'green')) kg_path = os.path.join(dataset_path, f'{token}.kg') if not os.path.isfile(kg_path): raise ValueError('[{token}.kg] not found in [{dataset_path}].') @@ -154,7 +154,7 @@ def _check_kg(self, kg): assert self.relation_field in kg, kg_warn_message.format(self.relation_field) def _load_link(self, token, dataset_path): - self.logger.debug(set_color('Loading link from [{}].', 'green').format(dataset_path)) + self.logger.debug(set_color(f'Loading link from [{dataset_path}].', 'green')) link_path = os.path.join(dataset_path, f'{token}.link') if not os.path.isfile(link_path): raise ValueError(f'[{token}.link] not found in [{dataset_path}].') @@ -208,7 +208,7 @@ def _get_ent_fields_in_same_space(self): if self._contain_ent_field(field_set): field_set = self._remove_ent_field(field_set) ent_fields.update(field_set) - self.logger.debug(set_color('ent_fields', 'blue') + ': {fields_in_same_space}') + self.logger.debug(set_color('ent_fields', 'blue') + f': {fields_in_same_space}') return ent_fields def _remove_ent_field(self, field_set): diff --git a/recbole/data/utils.py b/recbole/data/utils.py index 75c9e48ca..17d2a5cf8 100644 --- a/recbole/data/utils.py +++ b/recbole/data/utils.py @@ -104,16 +104,16 @@ def data_preparation(config, dataset, save=False): train_kwargs['kg_sampler'] = kg_sampler dataloader = get_data_loader('train', config, train_neg_sample_args) - logger.info((set_color('Build', 'pink') + set_color(' [{}]', 'yellow') + logger.info(set_color('Build', 'pink') + set_color(f' [{dataloader.__name__}]', 'yellow') + ' for ' + set_color('[train]', 'yellow') + ' with format ' - + set_color('[{}]', 'yellow')).format(dataloader.__name__, train_kwargs["dl_format"])) + + set_color(f'[{train_kwargs["dl_format"]}]', 'yellow')) if train_neg_sample_args['strategy'] != 'none': - logger.info(set_color('[train]', 'pink') + set_color(' Negative Sampling', 'blue') + ': {train_neg_sample_args}') + logger.info(set_color('[train]', 'pink') + set_color(' Negative Sampling', 'blue') + f': {train_neg_sample_args}') else: logger.info(set_color('[train]', 'pink') + set_color(' No Negative Sampling', 'yellow')) logger.info(set_color('[train]', 'pink') + set_color(' batch_size', 'cyan') + ' = ' - + set_color('[{train_kwargs["batch_size"]}]', 'yellow') + ', ' - + set_color('shuffle', 'cyan') + ' = ' + set_color('[{train_kwargs["shuffle"]}]\n', 'yellow')) + + set_color(f'[{train_kwargs["batch_size"]}]', 'yellow') + ', ' + + set_color('shuffle', 'cyan') + ' = ' + set_color(f'[{train_kwargs["shuffle"]}]\n', 'yellow')) train_data = dataloader(**train_kwargs) # Evaluation @@ -145,13 +145,13 @@ def data_preparation(config, dataset, save=False): test_kwargs.update(eval_kwargs) dataloader = get_data_loader('evaluation', config, eval_neg_sample_args) - logger.info((set_color('Build', 'pink') + set_color(' [{}]', 'yellow') + logger.info(set_color('Build', 'pink') + set_color(f' [{dataloader.__name__}]', 'yellow') + ' for ' + set_color('[evaluation]', 'yellow') + ' with format ' - + set_color('[{}]', 'yellow')).format(dataloader.__name__, eval_kwargs["dl_format"])) + + set_color(f'[{eval_kwargs["dl_format"]}]', 'yellow')) logger.info(es) - logger.info((set_color('[evaluation]', 'pink') + set_color(' batch_size', 'cyan') + ' = ' - + set_color('[{}]', 'yellow') + ', ' + set_color('shuffle', 'cyan') + ' = ' - + set_color('[{}]\n', 'yellow')).format(eval_kwargs["batch_size"], eval_kwargs["shuffle"])) + logger.info(set_color('[evaluation]', 'pink') + set_color(' batch_size', 'cyan') + ' = ' + + set_color(f'[{eval_kwargs["batch_size"]}]', 'yellow') + ', ' + set_color('shuffle', 'cyan') + ' = ' + + set_color(f'[{eval_kwargs["shuffle"]}]\n', 'yellow')) valid_data = dataloader(**valid_kwargs) test_data = dataloader(**test_kwargs) diff --git a/recbole/model/abstract_recommender.py b/recbole/model/abstract_recommender.py index b677cf909..d9a4d7be8 100644 --- a/recbole/model/abstract_recommender.py +++ b/recbole/model/abstract_recommender.py @@ -72,7 +72,7 @@ def __str__(self): """ model_parameters = filter(lambda p: p.requires_grad, self.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) - return super().__str__() + set_color('\nTrainable parameters', 'blue') + ': {}'.format(params) + return super().__str__() + set_color('\nTrainable parameters', 'blue') + f': {params}' class GeneralRecommender(AbstractRecommender): diff --git a/recbole/quick_start/quick_start.py b/recbole/quick_start/quick_start.py index aecaf61e0..2072d110c 100644 --- a/recbole/quick_start/quick_start.py +++ b/recbole/quick_start/quick_start.py @@ -58,8 +58,8 @@ def run_recbole(model=None, dataset=None, config_file_list=None, config_dict=Non # model evaluation test_result = trainer.evaluate(test_data, load_best_model=saved, show_progress=config['show_progress']) - logger.info(set_color('best valid ', 'yellow') + ': {}'.format(best_valid_result)) - logger.info(set_color('test result', 'yellow') + ': {}'.format(test_result)) + logger.info(set_color('best valid ', 'yellow') + f': {best_valid_result}') + logger.info(set_color('test result', 'yellow') + f': {test_result}') return { 'best_valid_score': best_valid_score, diff --git a/recbole/trainer/trainer.py b/recbole/trainer/trainer.py index b16eb943c..51f54f0ce 100644 --- a/recbole/trainer/trainer.py +++ b/recbole/trainer/trainer.py @@ -145,7 +145,7 @@ def _train_epoch(self, train_data, epoch_idx, loss_func=None, show_progress=Fals tqdm( enumerate(train_data), total=len(train_data), - desc=set_color("Train {:>5}", 'pink').format(epoch_idx), + desc=set_color(f"Train {epoch_idx:>5}", 'pink'), ) if show_progress else enumerate(train_data) ) for batch_idx, interaction in iter_data: @@ -871,7 +871,7 @@ def _train_epoch(self, train_data, epoch_idx, n_epochs, optimizer, encoder_flag, tqdm( enumerate(train_data), total=len(train_data), - desc=set_color("Train {:>5}", 'pink').format(epoch_idx), + desc=set_color(f"Train {epoch_idx:>5}", 'pink'), ) if show_progress else enumerate(train_data) ) for epoch in range(n_epochs): diff --git a/recbole/utils/logger.py b/recbole/utils/logger.py index f3afbd3c9..7461a584f 100644 --- a/recbole/utils/logger.py +++ b/recbole/utils/logger.py @@ -18,6 +18,7 @@ import colorlog from recbole.utils.utils import get_local_time, ensure_dir +from colorama import init log_colors_config = { 'DEBUG': 'cyan', @@ -40,6 +41,7 @@ def init_logger(config): >>> logger.debug(train_state) >>> logger.info(train_result) """ + init(autoreset=True) LOGROOT = './log/' dir_name = os.path.dirname(LOGROOT) ensure_dir(dir_name) diff --git a/requirements.txt b/requirements.txt index faf49dc2d..9cf90921e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ pandas>=1.0.5 tqdm>=4.48.2 scikit_learn>=0.23.2 pyyaml>=5.1.0 -colorlog \ No newline at end of file +colorlog==4.7.2 +colorama==0.4.4 \ No newline at end of file From 0cf66577a51330f89795c6f42ef5c922a7030321 Mon Sep 17 00:00:00 2001 From: Guan-JW <15692276873@163.com> Date: Thu, 11 Mar 2021 10:58:30 +0800 Subject: [PATCH 16/16] replace format with f --- recbole/data/dataset/kg_dataset.py | 2 +- recbole/data/dataset/social_dataset.py | 4 ++-- recbole/trainer/trainer.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/recbole/data/dataset/kg_dataset.py b/recbole/data/dataset/kg_dataset.py index 0bbf49635..3eb5a0873 100644 --- a/recbole/data/dataset/kg_dataset.py +++ b/recbole/data/dataset/kg_dataset.py @@ -142,7 +142,7 @@ def _load_kg(self, token, dataset_path): self.logger.debug(set_color(f'Loading kg from [{dataset_path}].', 'green')) kg_path = os.path.join(dataset_path, f'{token}.kg') if not os.path.isfile(kg_path): - raise ValueError('[{token}.kg] not found in [{dataset_path}].') + raise ValueError(f'[{token}.kg] not found in [{dataset_path}].') df = self._load_feat(kg_path, FeatureSource.KG) self._check_kg(df) return df diff --git a/recbole/data/dataset/social_dataset.py b/recbole/data/dataset/social_dataset.py index 66d1c050f..4b2ea104a 100644 --- a/recbole/data/dataset/social_dataset.py +++ b/recbole/data/dataset/social_dataset.py @@ -45,8 +45,8 @@ def _get_field_from_config(self): self.target_field = self.config['TARGET_ID_FIELD'] self._check_field('source_field', 'target_field') - self.logger.debug(set_color('source_id_field', 'blue') + ': {self.source_field}') - self.logger.debug(set_color('target_id_field', 'blue') + ': {self.target_field}') + self.logger.debug(set_color('source_id_field', 'blue') + f': {self.source_field}') + self.logger.debug(set_color('target_id_field', 'blue') + f': {self.target_field}') def _load_data(self, token, dataset_path): """Load ``.net`` additionally. diff --git a/recbole/trainer/trainer.py b/recbole/trainer/trainer.py index 51f54f0ce..34961d117 100644 --- a/recbole/trainer/trainer.py +++ b/recbole/trainer/trainer.py @@ -382,7 +382,7 @@ def evaluate(self, eval_data, load_best_model=True, model_file=None, show_progre tqdm( enumerate(eval_data), total=len(eval_data), - desc=set_color("Evaluate ", 'pink'), + desc=set_color(f"Evaluate ", 'pink'), ) if show_progress else enumerate(eval_data) ) for batch_idx, batched_data in iter_data: