From 02e971a83658eb9cf9e89390956b054b68b42f30 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 4 Dec 2020 19:37:08 -0500 Subject: [PATCH 1/4] add interaction feature for history items --- .../data/dataloader/sequential_dataloader.py | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/recbole/data/dataloader/sequential_dataloader.py b/recbole/data/dataloader/sequential_dataloader.py index d1030ef11..33de8f37a 100644 --- a/recbole/data/dataloader/sequential_dataloader.py +++ b/recbole/data/dataloader/sequential_dataloader.py @@ -14,6 +14,7 @@ import numpy as np import torch +from collections import Iterable from recbole.data.dataloader.abstract_dataloader import AbstractDataLoader from recbole.data.dataloader.neg_sample_mixin import NegSampleByMixin @@ -57,6 +58,21 @@ def __init__(self, config, dataset, self.target_time_field = self.time_field self.item_list_length_field = config['ITEM_LIST_LENGTH_FIELD'] + for field in dataset.inter_feat: + if field != self.iid_field and field != self.time_field: + ftype = dataset.field2type[field] + setattr(self, f'{field}_list_field', field + list_suffix) + if dataset.field2type[field] == FeatureType.TOKEN: + dataset.set_field_property(getattr(self, f'{field}_list_field'), FeatureType.TOKEN_SEQ, + FeatureSource.INTERACTION, + self.max_item_list_len) + elif dataset.field2type[field] == FeatureType.FLOAT: + dataset.set_field_property(getattr(self, f'{field}_list_field'), FeatureType.FLOAT_SEQ, + FeatureSource.INTERACTION, + self.max_item_list_len) + else: + raise NotImplementedError('Field with ftype [{}] is not implemented for sequential model'.format(ftype)) + dataset.set_field_property(self.item_list_field, FeatureType.TOKEN_SEQ, FeatureSource.INTERACTION, self.max_item_list_len) dataset.set_field_property(self.time_list_field, FeatureType.FLOAT_SEQ, FeatureSource.INTERACTION, @@ -136,6 +152,18 @@ def augmentation(self, uid_list, item_list_index, target_index, item_list_length for field in self.dataset.inter_feat: if field != self.iid_field and field != self.time_field: new_dict[field] = self.dataset.inter_feat[field][target_index].values + """Add extra field feature for interaction""" + ftype = self.dataset.field2type[field] + if ftype == FeatureType.TOKEN or ftype == FeatureType.FLOAT: + field_value = self.dataset.inter_feat[field] + dtype = np.int64 if ftype == FeatureType.TOKEN else np.float32 + new_dict[getattr(self, f'{field}_list_field')] = np.zeros((new_length, self.max_item_list_len), + dtype=dtype) + for i, (index, length) in enumerate(zip(item_list_index, item_list_length)): + new_dict[getattr(self, f'{field}_list_field')][i][:length] = field_value[index] + else: + raise NotImplementedError('Field with ftype [{}] is not implemented for sequential model'.format(ftype)) + if self.position_field: new_dict[self.position_field] = np.tile(np.arange(self.max_item_list_len), (new_length, 1)) @@ -284,4 +312,4 @@ def get_pos_len_list(self): Returns: np.ndarray or list: Number of positive item for each user in a training/evaluating epoch. """ - return np.ones(self.pr_end, dtype=np.int64) + return np.ones(self.pr_end, dtype=np.int64) \ No newline at end of file From 6d39987dca910000a64ca6e77a9634449250ce6f Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 4 Dec 2020 19:40:38 -0500 Subject: [PATCH 2/4] add interaction feature for history items --- recbole/data/dataloader/sequential_dataloader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/recbole/data/dataloader/sequential_dataloader.py b/recbole/data/dataloader/sequential_dataloader.py index 33de8f37a..cc4df34ea 100644 --- a/recbole/data/dataloader/sequential_dataloader.py +++ b/recbole/data/dataloader/sequential_dataloader.py @@ -14,7 +14,6 @@ import numpy as np import torch -from collections import Iterable from recbole.data.dataloader.abstract_dataloader import AbstractDataLoader from recbole.data.dataloader.neg_sample_mixin import NegSampleByMixin From 8c7bf37a251812767d353b244e0ad20229113406 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 4 Dec 2020 22:54:55 -0500 Subject: [PATCH 3/4] exclude the user/item/time field, and treat rest as interaction features --- recbole/data/dataloader/sequential_dataloader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recbole/data/dataloader/sequential_dataloader.py b/recbole/data/dataloader/sequential_dataloader.py index cc4df34ea..948208e1c 100644 --- a/recbole/data/dataloader/sequential_dataloader.py +++ b/recbole/data/dataloader/sequential_dataloader.py @@ -58,7 +58,7 @@ def __init__(self, config, dataset, self.item_list_length_field = config['ITEM_LIST_LENGTH_FIELD'] for field in dataset.inter_feat: - if field != self.iid_field and field != self.time_field: + if field not in [self.iid_field, self.time_field]: ftype = dataset.field2type[field] setattr(self, f'{field}_list_field', field + list_suffix) if dataset.field2type[field] == FeatureType.TOKEN: @@ -149,7 +149,7 @@ def augmentation(self, uid_list, item_list_index, target_index, item_list_length self.item_list_length_field: item_list_length, } for field in self.dataset.inter_feat: - if field != self.iid_field and field != self.time_field: + if field not in [self.iid_field, self.time_field]: new_dict[field] = self.dataset.inter_feat[field][target_index].values """Add extra field feature for interaction""" ftype = self.dataset.field2type[field] From 71af89c73f1ddf879799cc6927f78a819bd41947 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 4 Dec 2020 23:09:19 -0500 Subject: [PATCH 4/4] exclude the user/item/time field, and treat rest as interaction features --- recbole/data/dataloader/sequential_dataloader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recbole/data/dataloader/sequential_dataloader.py b/recbole/data/dataloader/sequential_dataloader.py index 948208e1c..b8e9dd64b 100644 --- a/recbole/data/dataloader/sequential_dataloader.py +++ b/recbole/data/dataloader/sequential_dataloader.py @@ -58,7 +58,7 @@ def __init__(self, config, dataset, self.item_list_length_field = config['ITEM_LIST_LENGTH_FIELD'] for field in dataset.inter_feat: - if field not in [self.iid_field, self.time_field]: + if field not in [self.uid_field, self.iid_field, self.time_field]: ftype = dataset.field2type[field] setattr(self, f'{field}_list_field', field + list_suffix) if dataset.field2type[field] == FeatureType.TOKEN: @@ -149,7 +149,7 @@ def augmentation(self, uid_list, item_list_index, target_index, item_list_length self.item_list_length_field: item_list_length, } for field in self.dataset.inter_feat: - if field not in [self.iid_field, self.time_field]: + if field not in [self.uid_field, self.iid_field, self.time_field]: new_dict[field] = self.dataset.inter_feat[field][target_index].values """Add extra field feature for interaction""" ftype = self.dataset.field2type[field]