From 0167ac82f12471da6898e9128d430b7af4be4d5a Mon Sep 17 00:00:00 2001
From: Yupeng Hou <houyupeng@ruc.edu.cn>
Date: Thu, 8 Jul 2021 03:14:31 +0000
Subject: [PATCH 01/12] FEA: add config 'augmentation'

---
 recbole/config/configurator.py                        | 11 ++++++++++-
 recbole/data/dataset/sequential_dataset.py            |  9 ++++++---
 recbole/properties/dataset/sample.yaml                |  1 +
 recbole/properties/quick_start_config/sequential.yaml |  2 +-
 .../properties/quick_start_config/sequential_DIN.yaml |  3 ++-
 recbole/utils/argument_list.py                        |  1 +
 6 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/recbole/config/configurator.py b/recbole/config/configurator.py
index f2e15b21d..70b84b0b2 100644
--- a/recbole/config/configurator.py
+++ b/recbole/config/configurator.py
@@ -3,7 +3,7 @@
 # @Email  : linzihan.super@foxmail.com
 
 # UPDATE
-# @Time   : 2020/10/04, 2021/3/2, 2021/2/17, 2021/6/30
+# @Time   : 2020/10/04, 2021/7/8, 2021/2/17, 2021/6/30
 # @Author : Shanlei Mu, Yupeng Hou, Jiawei Guan, Xingyu Pan
 # @Email  : slmu@ruc.edu.cn, houyupeng@ruc.edu.cn, Guanjw@ruc.edu.cn, xy_pan@foxmail.com
 
@@ -284,6 +284,15 @@ def _set_default_parameters(self):
         else:
             raise ValueError('Either Model has attr \'input_type\',' 'or arg \'loss_type\' should exist in config.')
 
+        if self.final_config_dict['MODEL_TYPE'] == ModelType.SEQUENTIAL and \
+           self.final_config_dict['benchmark_filename'] is not None and \
+           self.final_config_dict['augmentation']:
+            raise ValueError(
+                f'Benchmark datasets for sequential model {self.model} '
+                f'should be augmented in advance, which should not be augmented again and '
+                f'config \'augmentation\' should be False.'
+            )
+
         eval_type = None
         for metric in self.final_config_dict['metrics']:
             if metric.lower() in individual_metrics:
diff --git a/recbole/data/dataset/sequential_dataset.py b/recbole/data/dataset/sequential_dataset.py
index 706e7d6f6..f56901612 100644
--- a/recbole/data/dataset/sequential_dataset.py
+++ b/recbole/data/dataset/sequential_dataset.py
@@ -3,9 +3,9 @@
 # @Email  : chenyushuo@ruc.edu.cn
 
 # UPDATE:
-# @Time   : 2020/9/16, 2021/7/1
-# @Author : Yushuo Chen, Xingyu Pan
-# @Email  : chenyushuo@ruc.edu.cn, xy_pan@foxmail.com
+# @Time   : 2020/9/16, 2021/7/1, 2021/7/8
+# @Author : Yushuo Chen, Xingyu Pan, Yupeng Hou
+# @Email  : chenyushuo@ruc.edu.cn, xy_pan@foxmail.com, houyupeng@ruc.edu.cn
 
 """
 recbole.data.sequential_dataset
@@ -25,6 +25,8 @@ class SequentialDataset(Dataset):
     which can accelerate the data loader.
 
     Attributes:
+        augmentation (bool): Whether the interactions should be augmented in RecBole.
+
         uid_list (numpy.ndarray): List of user id after augmentation.
 
         item_list_index (numpy.ndarray): List of indexes of item sequence after augmentation.
@@ -36,6 +38,7 @@ class SequentialDataset(Dataset):
     """
 
     def __init__(self, config):
+        self.augmentation = config['augmentation']
         super().__init__(config)
 
     def prepare_data_augmentation(self):
diff --git a/recbole/properties/dataset/sample.yaml b/recbole/properties/dataset/sample.yaml
index d9869e5e2..ff4ac4915 100644
--- a/recbole/properties/dataset/sample.yaml
+++ b/recbole/properties/dataset/sample.yaml
@@ -43,6 +43,7 @@ normalize_field: ~
 normalize_all: ~
 
 # Sequential Model Needed
+augmentation: False
 ITEM_LIST_LENGTH_FIELD: item_length
 LIST_SUFFIX: _list
 MAX_ITEM_LIST_LENGTH: 50
diff --git a/recbole/properties/quick_start_config/sequential.yaml b/recbole/properties/quick_start_config/sequential.yaml
index 04e591aa2..2370f18b4 100644
--- a/recbole/properties/quick_start_config/sequential.yaml
+++ b/recbole/properties/quick_start_config/sequential.yaml
@@ -2,4 +2,4 @@ eval_args:
   split: {'LS': 2}
   order: TO
   mode: full
-
+augmentation: True
diff --git a/recbole/properties/quick_start_config/sequential_DIN.yaml b/recbole/properties/quick_start_config/sequential_DIN.yaml
index 398e2ff43..3e7f4a681 100644
--- a/recbole/properties/quick_start_config/sequential_DIN.yaml
+++ b/recbole/properties/quick_start_config/sequential_DIN.yaml
@@ -3,4 +3,5 @@ eval_args:
   order: TO
   mode: uni100
 metrics: ['AUC', 'LogLoss']
-valid_metric: AUC
\ No newline at end of file
+valid_metric: AUC
+augmentation: True
\ No newline at end of file
diff --git a/recbole/utils/argument_list.py b/recbole/utils/argument_list.py
index 632f442bd..31b719989 100644
--- a/recbole/utils/argument_list.py
+++ b/recbole/utils/argument_list.py
@@ -45,6 +45,7 @@
     'ITEM_LIST_LENGTH_FIELD', 'LIST_SUFFIX', 'MAX_ITEM_LIST_LENGTH', 'POSITION_FIELD',
     'HEAD_ENTITY_ID_FIELD', 'TAIL_ENTITY_ID_FIELD', 'RELATION_ID_FIELD', 'ENTITY_ID_FIELD',
     'load_col', 'unload_col', 'unused_col', 'additional_feat_suffix',
+    'augmentation',
     'max_user_inter_num', 'min_user_inter_num', 'max_item_inter_num', 'min_item_inter_num',
     'lowest_val', 'highest_val', 'equal_val', 'not_equal_val',
     'fields_in_same_space',

From 4fc613bc7af01c1110cded5e4153339783110f31 Mon Sep 17 00:00:00 2001
From: Yupeng Hou <houyupeng@ruc.edu.cn>
Date: Thu, 8 Jul 2021 13:11:31 +0000
Subject: [PATCH 02/12] REFACTOR: real augmentation for seq rec

---
 .../data/dataloader/sequential_dataloader.py  | 118 ++------------
 recbole/data/dataset/sequential_dataset.py    | 147 ++++++++++--------
 2 files changed, 92 insertions(+), 173 deletions(-)

diff --git a/recbole/data/dataloader/sequential_dataloader.py b/recbole/data/dataloader/sequential_dataloader.py
index ad962a329..258a1814f 100644
--- a/recbole/data/dataloader/sequential_dataloader.py
+++ b/recbole/data/dataloader/sequential_dataloader.py
@@ -3,7 +3,7 @@
 # @Email  : houyupeng@ruc.edu.cn
 
 # UPDATE
-# @Time   : 2020/10/6, 2020/9/17
+# @Time   : 2021/7/8, 2020/9/17
 # @Author : Yupeng Hou, Yushuo Chen
 # @email  : houyupeng@ruc.edu.cn, chenyushuo@ruc.edu.cn
 
@@ -15,15 +15,15 @@
 import numpy as np
 import torch
 
-from recbole.data.dataloader.abstract_dataloader import AbstractDataLoader
+from recbole.data.dataloader.general_dataloader import GeneralDataLoader
 from recbole.data.dataloader.neg_sample_mixin import NegSampleByMixin, NegSampleMixin
 from recbole.data.interaction import Interaction, cat_interactions
-from recbole.utils import DataLoaderType, FeatureSource, FeatureType, InputType
+from recbole.utils import DataLoaderType, InputType
 
 
-class SequentialDataLoader(AbstractDataLoader):
-    """:class:`SequentialDataLoader` is used for sequential model. It will do data augmentation for the origin data.
-    And its returned data contains the following:
+class SequentialDataLoader(GeneralDataLoader):
+    """:class:`SequentialDataLoader` is used for sequential model.
+    It contains the following:
 
         - user id
         - history items list
@@ -41,109 +41,7 @@ class SequentialDataLoader(AbstractDataLoader):
             :obj:`~recbole.utils.enum_type.InputType.POINTWISE`.
         shuffle (bool, optional): Whether the dataloader will be shuffle after a round. Defaults to ``False``.
     """
-    dl_type = DataLoaderType.ORIGIN
-
-    def __init__(self, config, dataset, batch_size=1, dl_format=InputType.POINTWISE, shuffle=False):
-        self.uid_field = dataset.uid_field
-        self.iid_field = dataset.iid_field
-        self.time_field = dataset.time_field
-        self.max_item_list_len = config['MAX_ITEM_LIST_LENGTH']
-
-        list_suffix = config['LIST_SUFFIX']
-        for field in dataset.inter_feat:
-            if field != self.uid_field:
-                list_field = field + list_suffix
-                setattr(self, f'{field}_list_field', list_field)
-                ftype = dataset.field2type[field]
-
-                if ftype in [FeatureType.TOKEN, FeatureType.TOKEN_SEQ]:
-                    list_ftype = FeatureType.TOKEN_SEQ
-                else:
-                    list_ftype = FeatureType.FLOAT_SEQ
-
-                if ftype in [FeatureType.TOKEN_SEQ, FeatureType.FLOAT_SEQ]:
-                    list_len = (self.max_item_list_len, dataset.field2seqlen[field])
-                else:
-                    list_len = self.max_item_list_len
-
-                dataset.set_field_property(list_field, list_ftype, FeatureSource.INTERACTION, list_len)
-
-        self.item_list_length_field = config['ITEM_LIST_LENGTH_FIELD']
-        dataset.set_field_property(self.item_list_length_field, FeatureType.TOKEN, FeatureSource.INTERACTION, 1)
-
-        self.uid_list = dataset.uid_list
-        self.item_list_index = dataset.item_list_index
-        self.target_index = dataset.target_index
-        self.item_list_length = dataset.item_list_length
-        self.pre_processed_data = None
-
-        super().__init__(config, dataset, batch_size=batch_size, dl_format=dl_format, shuffle=shuffle)
-
-    def data_preprocess(self):
-        """Do data augmentation before training/evaluation.
-        """
-        self.pre_processed_data = self.augmentation(self.item_list_index, self.target_index, self.item_list_length)
-
-    @property
-    def pr_end(self):
-        return len(self.uid_list)
-
-    def _shuffle(self):
-        if self.real_time:
-            new_index = torch.randperm(self.pr_end)
-            self.uid_list = self.uid_list[new_index]
-            self.item_list_index = self.item_list_index[new_index]
-            self.target_index = self.target_index[new_index]
-            self.item_list_length = self.item_list_length[new_index]
-        else:
-            self.pre_processed_data.shuffle()
-
-    def _next_batch_data(self):
-        cur_data = self._get_processed_data(slice(self.pr, self.pr + self.step))
-        self.pr += self.step
-        return cur_data
-
-    def _get_processed_data(self, index):
-        if self.real_time:
-            cur_data = self.augmentation(
-                self.item_list_index[index], self.target_index[index], self.item_list_length[index]
-            )
-        else:
-            cur_data = self.pre_processed_data[index]
-        return cur_data
-
-    def augmentation(self, item_list_index, target_index, item_list_length):
-        """Data augmentation.
-
-        Args:
-            item_list_index (numpy.ndarray): the index of history items list in interaction.
-            target_index (numpy.ndarray): the index of items to be predicted in interaction.
-            item_list_length (numpy.ndarray): history list length.
-
-        Returns:
-            dict: the augmented data.
-        """
-        new_length = len(item_list_index)
-        new_data = self.dataset.inter_feat[target_index]
-        new_dict = {
-            self.item_list_length_field: torch.tensor(item_list_length),
-        }
-
-        for field in self.dataset.inter_feat:
-            if field != self.uid_field:
-                list_field = getattr(self, f'{field}_list_field')
-                list_len = self.dataset.field2seqlen[list_field]
-                shape = (new_length, list_len) if isinstance(list_len, int) else (new_length,) + list_len
-                list_ftype = self.dataset.field2type[list_field]
-                dtype = torch.int64 if list_ftype in [FeatureType.TOKEN, FeatureType.TOKEN_SEQ] else torch.float64
-                new_dict[list_field] = torch.zeros(shape, dtype=dtype)
-
-                value = self.dataset.inter_feat[field]
-                for i, (index, length) in enumerate(zip(item_list_index, item_list_length)):
-                    new_dict[list_field][i][:length] = value[index]
-
-        new_data.update(Interaction(new_dict))
-        return new_data
+    pass
 
 
 class SequentialNegSampleDataLoader(NegSampleByMixin, SequentialDataLoader):
@@ -167,6 +65,7 @@ class SequentialNegSampleDataLoader(NegSampleByMixin, SequentialDataLoader):
     def __init__(
         self, config, dataset, sampler, neg_sample_args, batch_size=1, dl_format=InputType.POINTWISE, shuffle=False
     ):
+        self.iid_field = dataset.iid_field
         super().__init__(
             config, dataset, sampler, neg_sample_args, batch_size=batch_size, dl_format=dl_format, shuffle=shuffle
         )
@@ -253,6 +152,7 @@ class SequentialFullDataLoader(NegSampleMixin, SequentialDataLoader):
     def __init__(
         self, config, dataset, sampler, neg_sample_args, batch_size=1, dl_format=InputType.POINTWISE, shuffle=False
     ):
+        self.iid_field = dataset.iid_field
         super().__init__(
             config, dataset, sampler, neg_sample_args, batch_size=batch_size, dl_format=dl_format, shuffle=shuffle
         )
diff --git a/recbole/data/dataset/sequential_dataset.py b/recbole/data/dataset/sequential_dataset.py
index f56901612..5a66ea919 100644
--- a/recbole/data/dataset/sequential_dataset.py
+++ b/recbole/data/dataset/sequential_dataset.py
@@ -12,11 +12,12 @@
 ###############################
 """
 
-import copy
-
 import numpy as np
+import torch
 
 from recbole.data.dataset import Dataset
+from recbole.data.interaction import Interaction
+from recbole.utils.enum_type import FeatureType, FeatureSource
 
 
 class SequentialDataset(Dataset):
@@ -39,9 +40,44 @@ class SequentialDataset(Dataset):
 
     def __init__(self, config):
         self.augmentation = config['augmentation']
+        self.max_item_list_len = config['MAX_ITEM_LIST_LENGTH']
+        self.item_list_length_field = config['ITEM_LIST_LENGTH_FIELD']
         super().__init__(config)
 
-    def prepare_data_augmentation(self):
+    def _change_feat_format(self):
+        """Change feat format from :class:`pandas.DataFrame` to :class:`Interaction`,
+           then perform data augmentation.
+        """
+        super()._change_feat_format()
+        
+        if not self.augmentation:
+            return
+        self.logger.debug('Augmentation for sequential recommendation.')
+        self.data_augmentation()
+
+    def _aug_presets(self):
+        list_suffix = self.config['LIST_SUFFIX']
+        for field in self.inter_feat:
+            if field != self.uid_field:
+                list_field = field + list_suffix
+                setattr(self, f'{field}_list_field', list_field)
+                ftype = self.field2type[field]
+
+                if ftype in [FeatureType.TOKEN, FeatureType.TOKEN_SEQ]:
+                    list_ftype = FeatureType.TOKEN_SEQ
+                else:
+                    list_ftype = FeatureType.FLOAT_SEQ
+
+                if ftype in [FeatureType.TOKEN_SEQ, FeatureType.FLOAT_SEQ]:
+                    list_len = (self.max_item_list_len, self.field2seqlen[field])
+                else:
+                    list_len = self.max_item_list_len
+
+                self.set_field_property(list_field, list_ftype, FeatureSource.INTERACTION, list_len)
+
+        self.set_field_property(self.item_list_length_field, FeatureType.TOKEN, FeatureSource.INTERACTION, 1)
+
+    def data_augmentation(self):
         """Augmentation processing for sequential dataset.
 
         E.g., ``u1`` has purchase sequence ``<i1, i2, i3, i4>``,
@@ -57,14 +93,10 @@ def prepare_data_augmentation(self):
         ``u1, <i1, i2> | i3``
 
         ``u1, <i1, i2, i3> | i4``
-
-        Note:
-            Actually, we do not really generate these new item sequences.
-            One user's item sequence is stored only once in memory.
-            We store the index (slice) of each item sequence after augmentation,
-            which saves memory and accelerates a lot.
         """
-        self.logger.debug('prepare_data_augmentation')
+        self.logger.debug('data_augmentation')
+
+        self._aug_presets()
 
         self._check_field('uid_field', 'time_field')
         max_item_list_len = self.config['MAX_ITEM_LIST_LENGTH']
@@ -84,34 +116,32 @@ def prepare_data_augmentation(self):
                 target_index.append(i)
                 item_list_length.append(i - seq_start)
 
-        self.uid_list = np.array(uid_list)
-        self.item_list_index = np.array(item_list_index)
-        self.target_index = np.array(target_index)
-        self.item_list_length = np.array(item_list_length, dtype=np.int64)
-        self.mask = np.ones(len(self.inter_feat), dtype=np.bool)
-
-    def leave_one_out(self, group_by, leave_one_num=1):
-        self.logger.debug(f'Leave one out, group_by=[{group_by}], leave_one_num=[{leave_one_num}].')
-        if group_by is None:
-            raise ValueError('Leave one out strategy require a group field.')
-        if group_by != self.uid_field:
-            raise ValueError('Sequential models require group by user.')
-
-        self.prepare_data_augmentation()
-        grouped_index = self._grouped_index(self.uid_list)
-        next_index = self._split_index_by_leave_one_out(grouped_index, leave_one_num)
-
-        self._drop_unused_col()
-        next_ds = []
-        for index in next_index:
-            ds = copy.copy(self)
-            for field in ['uid_list', 'item_list_index', 'target_index', 'item_list_length']:
-                setattr(ds, field, np.array(getattr(ds, field)[index]))
-            setattr(ds, 'mask', np.ones(len(self.inter_feat), dtype=np.bool))
-            next_ds.append(ds)
-        next_ds[0].mask[self.target_index[next_index[1] + next_index[2]]] = False
-        next_ds[1].mask[self.target_index[next_index[2]]] = False
-        return next_ds
+        uid_list = np.array(uid_list)
+        item_list_index = np.array(item_list_index)
+        target_index = np.array(target_index)
+        item_list_length = np.array(item_list_length, dtype=np.int64)
+
+        new_length = len(item_list_index)
+        new_data = self.inter_feat[target_index]
+        new_dict = {
+            self.item_list_length_field: torch.tensor(item_list_length),
+        }
+
+        for field in self.inter_feat:
+            if field != self.uid_field:
+                list_field = getattr(self, f'{field}_list_field')
+                list_len = self.field2seqlen[list_field]
+                shape = (new_length, list_len) if isinstance(list_len, int) else (new_length,) + list_len
+                list_ftype = self.field2type[list_field]
+                dtype = torch.int64 if list_ftype in [FeatureType.TOKEN, FeatureType.TOKEN_SEQ] else torch.float64
+                new_dict[list_field] = torch.zeros(shape, dtype=dtype)
+
+                value = self.inter_feat[field]
+                for i, (index, length) in enumerate(zip(item_list_index, item_list_length)):
+                    new_dict[list_field][i][:length] = value[index]
+
+        new_data.update(Interaction(new_dict))
+        self.inter_feat = new_data
 
     def inter_matrix(self, form='coo', value_field=None):
         """Get sparse matrix that describe interactions between user_id and item_id.
@@ -129,35 +159,24 @@ def inter_matrix(self, form='coo', value_field=None):
         """
         if not self.uid_field or not self.iid_field:
             raise ValueError('dataset does not exist uid/iid, thus can not converted to sparse matrix.')
-
-        self.logger.warning(
-            'Load interaction matrix may lead to label leakage from testing phase, this implementation '
-            'only provides the interactions corresponding to specific phase'
-        )
-        local_inter_feat = self.inter_feat[self.mask]  # TODO: self.mask will applied to _history_matrix() in future
+        local_inter_feat = self.inter_feat
+        # TODO add items in the session of length 1
+        raise NotImplementedError()
         return self._create_sparse_matrix(local_inter_feat, self.uid_field, self.iid_field, form, value_field)
 
     def build(self):
+        """Processing dataset according to evaluation setting, including Group, Order and Split.
+        See :class:`~recbole.config.eval_setting.EvalSetting` for details.
 
-        self._change_feat_format()
+        Args:
+            eval_setting (:class:`~recbole.config.eval_setting.EvalSetting`):
+                Object contains evaluation settings, which guide the data processing procedure.
 
-        
+        Returns:
+            list: List of built :class:`Dataset`.
+        """
         ordering_args = self.config['eval_args']['order']
-        if ordering_args == 'RO':
-            raise ValueError('Ordering strategy `shuffle` is not supported in sequential models.')
-
-        group_by = self.config['eval_args']['group_by']
-        if group_by != 'user':
-            raise ValueError('The data splitting for Sequential models must be grouped by user.')
-            
-        split_args = self.config['eval_args']['split']
-        if split_args is None:
-            raise ValueError('The split_args in eval_args should not be None.')
-        if isinstance(split_args, dict) != True:
-            raise ValueError(f'The split_args [{split_args}] should be a dict.')
-
-        split_mode = list(split_args.keys())[0] 
-        if split_mode == 'LS':
-            return self.leave_one_out(group_by=self.config['USER_ID_FIELD'], leave_one_num=split_args['LS'])
-        else:
-            ValueError('Sequential models require `LS` (leave one out) split strategy.')
+        if ordering_args != 'TO':
+            raise ValueError(f'The ordering args for sequential recommendation has to be \'TO\'')
+
+        return super().build()

From 563043ca4b1e154c33566487300ea857ae4c4340 Mon Sep 17 00:00:00 2001
From: Yupeng Hou <houyupeng@ruc.edu.cn>
Date: Fri, 9 Jul 2021 02:17:16 +0000
Subject: [PATCH 03/12] FIX: bugs in negative sample seq dataloader

---
 recbole/data/dataloader/sequential_dataloader.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/recbole/data/dataloader/sequential_dataloader.py b/recbole/data/dataloader/sequential_dataloader.py
index 258a1814f..ab77fd9c7 100644
--- a/recbole/data/dataloader/sequential_dataloader.py
+++ b/recbole/data/dataloader/sequential_dataloader.py
@@ -65,7 +65,9 @@ class SequentialNegSampleDataLoader(NegSampleByMixin, SequentialDataLoader):
     def __init__(
         self, config, dataset, sampler, neg_sample_args, batch_size=1, dl_format=InputType.POINTWISE, shuffle=False
     ):
+        self.uid_field = dataset.uid_field
         self.iid_field = dataset.iid_field
+        self.label_field = dataset.label_field
         super().__init__(
             config, dataset, sampler, neg_sample_args, batch_size=batch_size, dl_format=dl_format, shuffle=shuffle
         )
@@ -77,7 +79,7 @@ def _batch_size_adaptation(self):
         self.upgrade_batch_size(new_batch_size)
 
     def _next_batch_data(self):
-        cur_data = self._get_processed_data(slice(self.pr, self.pr + self.step))
+        cur_data = self.dataset[self.pr:self.pr + self.step]
         cur_data = self._neg_sampling(cur_data)
         self.pr += self.step
 

From 09e967a6d5f3014a4ce1019645a6390914088163 Mon Sep 17 00:00:00 2001
From: Yupeng Hou <houyupeng@ruc.edu.cn>
Date: Fri, 9 Jul 2021 02:17:39 +0000
Subject: [PATCH 04/12] FIX: comments for seq dataset

---
 recbole/data/dataset/sequential_dataset.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/recbole/data/dataset/sequential_dataset.py b/recbole/data/dataset/sequential_dataset.py
index 5a66ea919..4d3e6a7c2 100644
--- a/recbole/data/dataset/sequential_dataset.py
+++ b/recbole/data/dataset/sequential_dataset.py
@@ -3,7 +3,7 @@
 # @Email  : chenyushuo@ruc.edu.cn
 
 # UPDATE:
-# @Time   : 2020/9/16, 2021/7/1, 2021/7/8
+# @Time   : 2020/9/16, 2021/7/1, 2021/7/9
 # @Author : Yushuo Chen, Xingyu Pan, Yupeng Hou
 # @Email  : chenyushuo@ruc.edu.cn, xy_pan@foxmail.com, houyupeng@ruc.edu.cn
 
@@ -27,15 +27,8 @@ class SequentialDataset(Dataset):
 
     Attributes:
         augmentation (bool): Whether the interactions should be augmented in RecBole.
-
-        uid_list (numpy.ndarray): List of user id after augmentation.
-
-        item_list_index (numpy.ndarray): List of indexes of item sequence after augmentation.
-
-        target_index (numpy.ndarray): List of indexes of target item id after augmentation.
-
-        item_list_length (numpy.ndarray): List of item sequences' length after augmentation.
-
+        max_item_list_len (int): Max length of historical item list.
+        item_list_length_field (str): Field name for item lists' length.
     """
 
     def __init__(self, config):

From d097d4e85a06a16c08a1f29753689037061dcd65 Mon Sep 17 00:00:00 2001
From: Yupeng Hou <houyupeng@ruc.edu.cn>
Date: Fri, 9 Jul 2021 02:49:14 +0000
Subject: [PATCH 05/12] REFACTOR: DIEN & DIN's dataset & dataloader

---
 recbole/data/dataloader/__init__.py        |   1 -
 recbole/data/dataloader/dien_dataloader.py | 146 ---------------------
 recbole/data/dataset/customized_dataset.py | 110 +++++++++++++++-
 recbole/data/utils.py                      |  44 +------
 4 files changed, 110 insertions(+), 191 deletions(-)
 delete mode 100644 recbole/data/dataloader/dien_dataloader.py

diff --git a/recbole/data/dataloader/__init__.py b/recbole/data/dataloader/__init__.py
index dce69e125..b8b2e911f 100644
--- a/recbole/data/dataloader/__init__.py
+++ b/recbole/data/dataloader/__init__.py
@@ -3,7 +3,6 @@
 from recbole.data.dataloader.general_dataloader import *
 from recbole.data.dataloader.context_dataloader import *
 from recbole.data.dataloader.sequential_dataloader import *
-from recbole.data.dataloader.dien_dataloader import *
 from recbole.data.dataloader.knowledge_dataloader import *
 from recbole.data.dataloader.decisiontree_dataloader import *
 from recbole.data.dataloader.user_dataloader import *
diff --git a/recbole/data/dataloader/dien_dataloader.py b/recbole/data/dataloader/dien_dataloader.py
deleted file mode 100644
index 6fb06b321..000000000
--- a/recbole/data/dataloader/dien_dataloader.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# @Time   : 2021/2/25
-# @Author : Zhichao Feng
-# @Email  : fzcbupt@gmail.com
-
-# UPDATE
-# @Time   : 2021/3/19
-# @Author : Zhichao Feng
-# @email  : fzcbupt@gmail.com
-
-"""
-recbole.data.dataloader.dien_dataloader
-################################################
-"""
-
-import torch
-
-from recbole.data.dataloader.sequential_dataloader import SequentialDataLoader, SequentialNegSampleDataLoader, SequentialFullDataLoader
-from recbole.data.interaction import Interaction, cat_interactions
-from recbole.utils import DataLoaderType, FeatureSource, FeatureType, InputType
-from recbole.sampler import SeqSampler
-
-
-class DIENDataLoader(SequentialDataLoader):
-    """:class:`DIENDataLoader` is used for DIEN model. It is different from :class:`SequentialDataLoader` in
-    `augmentation`. It add users' negative item list to interaction.
-    It will do data augmentation for the origin data. And its returned data contains the following:
-
-        - user id
-        - history items list
-        - history negative item list
-        - history items' interaction time list
-        - item to be predicted
-        - the interaction time of item to be predicted
-        - history list length
-        - other interaction information of item to be predicted
-
-    Args:
-        config (Config): The config of dataloader.
-        dataset (Dataset): The dataset of dataloader.
-        batch_size (int, optional): The batch_size of dataloader. Defaults to ``1``.
-        dl_format (InputType, optional): The input type of dataloader. Defaults to
-            :obj:`~recbole.utils.enum_type.InputType.POINTWISE`.
-        shuffle (bool, optional): Whether the dataloader will be shuffle after a round. Defaults to ``False``.
-    """
-    dl_type = DataLoaderType.ORIGIN
-
-    def __init__(self, config, dataset, batch_size=1, dl_format=InputType.POINTWISE, shuffle=False):
-
-        list_suffix = config['LIST_SUFFIX']
-        neg_prefix = config['NEG_PREFIX']
-
-        self.seq_sampler = SeqSampler(dataset)
-        self.iid_field = dataset.iid_field
-        self.neg_item_list_field = neg_prefix + self.iid_field + list_suffix
-        self.neg_item_list = self.seq_sampler.sample_neg_sequence(dataset.inter_feat[self.iid_field])
-
-        super().__init__(config, dataset, batch_size=batch_size, dl_format=dl_format, shuffle=shuffle)
-
-    def augmentation(self, item_list_index, target_index, item_list_length):
-        """Data augmentation.
-
-        Args:
-            item_list_index (numpy.ndarray): the index of history items list in interaction.
-            target_index (numpy.ndarray): the index of items to be predicted in interaction.
-            item_list_length (numpy.ndarray): history list length.
-
-        Returns:
-            dict: the augmented data.
-        """
-        new_length = len(item_list_index)
-        new_data = self.dataset.inter_feat[target_index]
-        new_dict = {
-            self.item_list_length_field: torch.tensor(item_list_length),
-        }
-
-        for field in self.dataset.inter_feat:
-            if field != self.uid_field:
-                list_field = getattr(self, f'{field}_list_field')
-                list_len = self.dataset.field2seqlen[list_field]
-                shape = (new_length, list_len) if isinstance(list_len, int) else (new_length,) + list_len
-                list_ftype = self.dataset.field2type[list_field]
-                dtype = torch.int64 if list_ftype in [FeatureType.TOKEN, FeatureType.TOKEN_SEQ] else torch.float64
-                new_dict[list_field] = torch.zeros(shape, dtype=dtype)
-
-                value = self.dataset.inter_feat[field]
-                for i, (index, length) in enumerate(zip(item_list_index, item_list_length)):
-                    new_dict[list_field][i][:length] = value[index]
-
-                if field == self.iid_field:
-                    new_dict[self.neg_item_list_field] = torch.zeros(shape, dtype=dtype)
-                    for i, (index, length) in enumerate(zip(item_list_index, item_list_length)):
-                        new_dict[self.neg_item_list_field][i][:length] = self.neg_item_list[index]
-
-        new_data.update(Interaction(new_dict))
-        return new_data
-
-
-class DIENNegSampleDataLoader(SequentialNegSampleDataLoader, DIENDataLoader):
-    """:class:`DIENNegSampleDataLoader` is sequential-dataloader with negative sampling for DIEN.
-    Like :class:`~recbole.data.dataloader.general_dataloader.GeneralNegSampleDataLoader`, for the result of every batch,
-    we permit that every positive interaction and its negative interaction must be in the same batch. Beside this,
-    when it is in the evaluation stage, and evaluator is topk-like function, we also permit that all the interactions
-    corresponding to each user are in the same batch and positive interactions are before negative interactions.
-
-    Args:
-        config (Config): The config of dataloader.
-        dataset (Dataset): The dataset of dataloader.
-        sampler (Sampler): The sampler of dataloader.
-        neg_sample_args (dict): The neg_sample_args of dataloader.
-        batch_size (int, optional): The batch_size of dataloader. Defaults to ``1``.
-        dl_format (InputType, optional): The input type of dataloader. Defaults to
-            :obj:`~recbole.utils.enum_type.InputType.POINTWISE`.
-        shuffle (bool, optional): Whether the dataloader will be shuffle after a round. Defaults to ``False``.
-    """
-
-    def __init__(
-        self, config, dataset, sampler, neg_sample_args, batch_size=1, dl_format=InputType.POINTWISE, shuffle=False
-    ):
-        super().__init__(
-            config, dataset, sampler, neg_sample_args, batch_size=batch_size, dl_format=dl_format, shuffle=shuffle
-        )
-
-
-class DIENFullDataLoader(SequentialFullDataLoader, DIENDataLoader):
-    """:class:`DIENFullDataLoader` is a sequential-dataloader with full sort for DIEN. In order to speed up calculation,
-    this dataloader would only return then user part of interactions, positive items and used items.
-    It would not return negative items.
-
-    Args:
-        config (Config): The config of dataloader.
-        dataset (Dataset): The dataset of dataloader.
-        sampler (Sampler): The sampler of dataloader.
-        neg_sample_args (dict): The neg_sample_args of dataloader.
-        batch_size (int, optional): The batch_size of dataloader. Defaults to ``1``.
-        dl_format (InputType, optional): The input type of dataloader. Defaults to
-            :obj:`~recbole.utils.enum_type.InputType.POINTWISE`.
-        shuffle (bool, optional): Whether the dataloader will be shuffle after a round. Defaults to ``False``.
-    """
-    dl_type = DataLoaderType.FULL
-
-    def __init__(
-        self, config, dataset, sampler, neg_sample_args, batch_size=1, dl_format=InputType.POINTWISE, shuffle=False
-    ):
-        super().__init__(
-            config, dataset, sampler, neg_sample_args, batch_size=batch_size, dl_format=dl_format, shuffle=shuffle
-        )
diff --git a/recbole/data/dataset/customized_dataset.py b/recbole/data/dataset/customized_dataset.py
index d1e34bab2..3dd7f0546 100644
--- a/recbole/data/dataset/customized_dataset.py
+++ b/recbole/data/dataset/customized_dataset.py
@@ -2,6 +2,11 @@
 # @Author : Yupeng Hou
 # @Email  : houyupeng@ruc.edu.cn
 
+# UPDATE
+# @Time   : 2021/7/9
+# @Author : Yupeng Hou
+# @Email  : houyupeng@ruc.edu.cn
+
 """
 recbole.data.customized_dataset
 ##################################
@@ -11,7 +16,13 @@
 Customized datasets named ``[Model Name]Dataset`` can be automatically called.
 """
 
-from recbole.data.dataset import Kg_Seq_Dataset
+import numpy as np
+import torch
+
+from recbole.data.dataset import Kg_Seq_Dataset, SequentialDataset
+from recbole.data.interaction import Interaction
+from recbole.sampler import SeqSampler
+from recbole.utils.enum_type import FeatureType
 
 
 class GRU4RecKGDataset(Kg_Seq_Dataset):
@@ -24,3 +35,100 @@ class KSRDataset(Kg_Seq_Dataset):
 
     def __init__(self, config):
         super().__init__(config)
+
+
+class DIENDataset(SequentialDataset):
+    """:class:`DIENDataset` is based on :class:`~recbole.data.dataset.sequential_dataset.SequentialDataset`.
+    It is different from :class:`SequentialDataset` in `data_augmentation`.
+    It add users' negative item list to interaction.
+
+    The original version of sampling negative item list is implemented by Zhichao Feng (fzcbupt@gmail.com) in 2021/2/25,
+    and he updated the codes in 2021/3/19. In 2021/7/9, Yupeng refactored SequentialDataset & SequentialDataLoader,
+    then refactored DIENDataset, either.
+
+    Attributes:
+        augmentation (bool): Whether the interactions should be augmented in RecBole.
+        seq_sample (recbole.sampler.SeqSampler): A sampler used to sample negative item sequence.
+        neg_item_list_field (str): Field name for negative item sequence.
+        neg_item_list (torch.tensor): all users' negative item history sequence.
+    """
+    def __init__(self, config):
+        super().__init__(config)
+
+        list_suffix = config['LIST_SUFFIX']
+        neg_prefix = config['NEG_PREFIX']
+        self.seq_sampler = SeqSampler(self)
+        self.neg_item_list_field = neg_prefix + self.iid_field + list_suffix
+        self.neg_item_list = self.seq_sampler.sample_neg_sequence(self.inter_feat[self.iid_field])
+
+    def data_augmentation(self):
+        """Augmentation processing for sequential dataset.
+
+        E.g., ``u1`` has purchase sequence ``<i1, i2, i3, i4>``,
+        then after augmentation, we will generate three cases.
+
+        ``u1, <i1> | i2``
+
+        (Which means given user_id ``u1`` and item_seq ``<i1>``,
+        we need to predict the next item ``i2``.)
+
+        The other cases are below:
+
+        ``u1, <i1, i2> | i3``
+
+        ``u1, <i1, i2, i3> | i4``
+        """
+        self.logger.debug('data_augmentation')
+
+        self._aug_presets()
+
+        self._check_field('uid_field', 'time_field')
+        max_item_list_len = self.config['MAX_ITEM_LIST_LENGTH']
+        self.sort(by=[self.uid_field, self.time_field], ascending=True)
+        last_uid = None
+        uid_list, item_list_index, target_index, item_list_length = [], [], [], []
+        seq_start = 0
+        for i, uid in enumerate(self.inter_feat[self.uid_field].numpy()):
+            if last_uid != uid:
+                last_uid = uid
+                seq_start = i
+            else:
+                if i - seq_start > max_item_list_len:
+                    seq_start += 1
+                uid_list.append(uid)
+                item_list_index.append(slice(seq_start, i))
+                target_index.append(i)
+                item_list_length.append(i - seq_start)
+
+        uid_list = np.array(uid_list)
+        item_list_index = np.array(item_list_index)
+        target_index = np.array(target_index)
+        item_list_length = np.array(item_list_length, dtype=np.int64)
+
+        new_length = len(item_list_index)
+        new_data = self.inter_feat[target_index]
+        new_dict = {
+            self.item_list_length_field: torch.tensor(item_list_length),
+        }
+
+        for field in self.inter_feat:
+            if field != self.uid_field:
+                list_field = getattr(self, f'{field}_list_field')
+                list_len = self.field2seqlen[list_field]
+                shape = (new_length, list_len) if isinstance(list_len, int) else (new_length,) + list_len
+                list_ftype = self.field2type[list_field]
+                dtype = torch.int64 if list_ftype in [FeatureType.TOKEN, FeatureType.TOKEN_SEQ] else torch.float64
+                new_dict[list_field] = torch.zeros(shape, dtype=dtype)
+
+                value = self.inter_feat[field]
+                for i, (index, length) in enumerate(zip(item_list_index, item_list_length)):
+                    new_dict[list_field][i][:length] = value[index]
+
+                # DIEN
+                if field == self.iid_field:
+                    new_dict[self.neg_item_list_field] = torch.zeros(shape, dtype=dtype)
+                    for i, (index, length) in enumerate(zip(item_list_index, item_list_length)):
+                        new_dict[self.neg_item_list_field][i][:length] = self.neg_item_list[index]
+
+        new_data.update(Interaction(new_dict))
+        self.inter_feat = new_data
diff --git a/recbole/data/utils.py b/recbole/data/utils.py
index 56463a0b1..124a4a737 100644
--- a/recbole/data/utils.py
+++ b/recbole/data/utils.py
@@ -3,7 +3,7 @@
 # @Email  : houyupeng@ruc.edu.cn
 
 # UPDATE:
-# @Time   : 2020/10/19, 2020/9/17, 2020/8/31, 2021/2/20, 2021/3/1
+# @Time   : 2021/7/9, 2020/9/17, 2020/8/31, 2021/2/20, 2021/3/1
 # @Author : Yupeng Hou, Yushuo Chen, Kaiyuan Li, Haoran Cheng, Jiawei Guan
 # @Email  : houyupeng@ruc.edu.cn, chenyushuo@ruc.edu.cn, tsotfsk@outlook.com, chenghaoran29@foxmail.com, guanjw@ruc.edu.cn
 
@@ -225,8 +225,6 @@ def get_data_loader(name, config, neg_sample_args):
         type: The dataloader class that meets the requirements in :attr:`config` and :attr:`eval_setting`.
     """
     register_table = {
-        'DIN': _get_DIN_data_loader,
-        'DIEN': _get_DIEN_data_loader,
         "MultiDAE": _get_AE_data_loader,
         "MultiVAE": _get_AE_data_loader,
         'MacridVAE': _get_AE_data_loader,
@@ -274,46 +272,6 @@ def get_data_loader(name, config, neg_sample_args):
         raise NotImplementedError(f'Model_type [{model_type}] has not been implemented.')
 
 
-def _get_DIN_data_loader(name, config, neg_sample_args):
-    """Customized function for DIN to get correct dataloader class.
-
-    Args:
-        name (str): The stage of dataloader. It can only take two values: 'train' or 'evaluation'.
-        config (Config): An instance object of Config, used to record parameter information.
-        neg_sample_args : Settings of negative sampling.
-
-    Returns:
-        type: The dataloader class that meets the requirements in :attr:`config` and :attr:`eval_setting`.
-    """
-    neg_sample_strategy = neg_sample_args['strategy']
-    if neg_sample_strategy == 'none':
-        return SequentialDataLoader
-    elif neg_sample_strategy == 'by':
-        return SequentialNegSampleDataLoader
-    elif neg_sample_strategy == 'full':
-        return SequentialFullDataLoader
-
-
-def _get_DIEN_data_loader(name, config, neg_sample_args):
-    """Customized function for DIEN to get correct dataloader class.
-
-    Args:
-        name (str): The stage of dataloader. It can only take two values: 'train' or 'evaluation'.
-        config (Config): An instance object of Config, used to record parameter information.
-        neg_sample_args : Settings of negative sampling.
-
-    Returns:
-        type: The dataloader class that meets the requirements in :attr:`config` and :attr:`eval_setting`.
-    """
-    neg_sample_strategy = neg_sample_args['strategy']
-    if neg_sample_strategy == 'none':
-        return DIENDataLoader
-    elif neg_sample_strategy == 'by':
-        return DIENNegSampleDataLoader
-    elif neg_sample_strategy == 'full':
-        return DIENFullDataLoader
-
-
 def _get_AE_data_loader(name, config, neg_sample_args):
     """Customized function for Multi-DAE and Multi-VAE to get correct dataloader class.
 

From 9c2cbae2af5c1770091b28974d8cd57b9ea3f502 Mon Sep 17 00:00:00 2001
From: Yupeng Hou <houyupeng@ruc.edu.cn>
Date: Fri, 9 Jul 2021 06:10:40 +0000
Subject: [PATCH 06/12] FEA: inter_matrix for seq dataset

---
 recbole/data/dataset/sequential_dataset.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/recbole/data/dataset/sequential_dataset.py b/recbole/data/dataset/sequential_dataset.py
index 4d3e6a7c2..1f03dff3f 100644
--- a/recbole/data/dataset/sequential_dataset.py
+++ b/recbole/data/dataset/sequential_dataset.py
@@ -152,9 +152,19 @@ def inter_matrix(self, form='coo', value_field=None):
         """
         if not self.uid_field or not self.iid_field:
             raise ValueError('dataset does not exist uid/iid, thus can not converted to sparse matrix.')
-        local_inter_feat = self.inter_feat
-        # TODO add items in the session of length 1
-        raise NotImplementedError()
+
+        l1_idx = (self.inter_feat[self.item_list_length_field] == 1)
+        l1_inter_dict = self.inter_feat[l1_idx].interaction
+        new_dict = {}
+        list_suffix = self.config['LIST_SUFFIX']
+        candidate_field_set = set()
+        for field in l1_inter_dict:
+            if field != self.uid_field and field + list_suffix in l1_inter_dict:
+                candidate_field_set.add(field)
+                new_dict[field] = torch.cat([self.inter_feat[field], l1_inter_dict[field + list_suffix][:,0]])
+            elif (not field.endswith(list_suffix)) and (field != self.item_list_length_field):
+                new_dict[field] = torch.cat([self.inter_feat[field], l1_inter_dict[field]])
+        local_inter_feat = Interaction(new_dict)
         return self._create_sparse_matrix(local_inter_feat, self.uid_field, self.iid_field, form, value_field)
 
     def build(self):

From 894fd33a4e6eec31b1a8852fb2d794194d7da52d Mon Sep 17 00:00:00 2001
From: Yupeng Hou <houyupeng@ruc.edu.cn>
Date: Fri, 9 Jul 2021 06:10:54 +0000
Subject: [PATCH 07/12] FIX: test for seq loo

---
 tests/data/test_dataset.py | 59 +++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 23 deletions(-)

diff --git a/tests/data/test_dataset.py b/tests/data/test_dataset.py
index 2844c97e5..65512d45d 100644
--- a/tests/data/test_dataset.py
+++ b/tests/data/test_dataset.py
@@ -564,21 +564,34 @@ def test_seq_leave_one_out(self):
             'training_neg_sample_num': 0
         }
         train_dataset, valid_dataset, test_dataset = split_dataset(config_dict=config_dict)
-        assert (train_dataset.uid_list == [1, 1, 1, 1, 1, 2, 2, 3, 4]).all()
-        assert (train_dataset.item_list_index == [slice(0, 1), slice(0, 2), slice(0, 3), slice(0, 4), slice(0, 5),
-                                                  slice(8, 9), slice(8, 10), slice(13, 14), slice(16, 17)]).all()
-        assert (train_dataset.target_index == [1, 2, 3, 4, 5, 9, 10, 14, 17]).all()
-        assert (train_dataset.item_list_length == [1, 2, 3, 4, 5, 1, 2, 1, 1]).all()
-
-        assert (valid_dataset.uid_list == [1, 2]).all()
-        assert (valid_dataset.item_list_index == [slice(0, 6), slice(8, 11)]).all()
-        assert (valid_dataset.target_index == [6, 11]).all()
-        assert (valid_dataset.item_list_length == [6, 3]).all()
-
-        assert (test_dataset.uid_list == [1, 2, 3]).all()
-        assert (test_dataset.item_list_index == [slice(0, 7), slice(8, 12), slice(13, 15)]).all()
-        assert (test_dataset.target_index == [7, 12, 15]).all()
-        assert (test_dataset.item_list_length == [7, 4, 2]).all()
+        assert (train_dataset.inter_feat[train_dataset.uid_field].numpy() == [1, 1, 1, 1, 1, 4, 2, 2, 3]).all()
+        assert (train_dataset.inter_feat[train_dataset.item_id_list_field][:,:5].numpy() == [
+            [1, 0, 0, 0, 0],
+            [1, 2, 0, 0, 0],
+            [1, 2, 3, 0, 0],
+            [1, 2, 3, 4, 0],
+            [1, 2, 3, 4, 5],
+            [3, 0, 0, 0, 0],
+            [4, 0, 0, 0, 0],
+            [4, 5, 0, 0, 0],
+            [4, 0, 0, 0, 0]]).all()
+        assert (train_dataset.inter_feat[train_dataset.iid_field].numpy() == [2, 3, 4, 5, 6, 4, 5, 6, 5]).all()
+        assert (train_dataset.inter_feat[train_dataset.item_list_length_field].numpy() == [1, 2, 3, 4, 5, 1, 1, 2, 1]).all()
+
+        assert (valid_dataset.inter_feat[valid_dataset.uid_field].numpy() == [1, 2]).all()
+        assert (valid_dataset.inter_feat[valid_dataset.item_id_list_field][:,:6].numpy() == [
+            [1, 2, 3, 4, 5, 6],
+            [4, 5, 6, 0, 0, 0]]).all()
+        assert (valid_dataset.inter_feat[valid_dataset.iid_field].numpy() == [7, 7]).all()
+        assert (valid_dataset.inter_feat[valid_dataset.item_list_length_field].numpy() == [6, 3]).all()
+
+        assert (test_dataset.inter_feat[test_dataset.uid_field].numpy() == [1, 2, 3]).all()
+        assert (test_dataset.inter_feat[test_dataset.item_id_list_field][:,:7].numpy() == [
+            [1, 2, 3, 4, 5, 6, 7],
+            [4, 5, 6, 7, 0, 0, 0],
+            [4, 5, 0, 0, 0, 0, 0]]).all()
+        assert (test_dataset.inter_feat[test_dataset.iid_field].numpy() == [8, 8, 6]).all()
+        assert (test_dataset.inter_feat[test_dataset.item_list_length_field].numpy() == [7, 4, 2]).all()
 
         assert (train_dataset.inter_matrix().toarray() == [
             [0., 0., 0., 0., 0., 0., 0., 0., 0.],
@@ -589,17 +602,17 @@ def test_seq_leave_one_out(self):
         ]).all()
         assert (valid_dataset.inter_matrix().toarray() == [
             [0., 0., 0., 0., 0., 0., 0., 0., 0.],
-            [0., 1., 1., 1., 1., 1., 1., 1., 0.],
-            [0., 0., 0., 0., 1., 1., 1., 1., 0.],
-            [0., 0., 0., 0., 1., 1., 0., 0., 0.],
-            [0., 0., 0., 1., 1., 0., 0., 0., 0.],
+            [0., 0., 0., 0., 0., 0., 0., 1., 0.],
+            [0., 0., 0., 0., 0., 0., 0., 1., 0.],
+            [0., 0., 0., 0., 0., 0., 0., 0., 0.],
+            [0., 0., 0., 0., 0., 0., 0., 0., 0.]
         ]).all()
         assert (test_dataset.inter_matrix().toarray() == [
             [0., 0., 0., 0., 0., 0., 0., 0., 0.],
-            [0., 1., 1., 1., 1., 1., 1., 1., 1.],
-            [0., 0., 0., 0., 1., 1., 1., 1., 1.],
-            [0., 0., 0., 0., 1., 1., 1., 0., 0.],
-            [0., 0., 0., 1., 1., 0., 0., 0., 0.],
+            [0., 0., 0., 0., 0., 0., 0., 0., 1.],
+            [0., 0., 0., 0., 0., 0., 0., 0., 1.],
+            [0., 0., 0., 0., 0., 0., 1., 0., 0.],
+            [0., 0., 0., 0., 0., 0., 0., 0., 0.]
         ]).all()
 
 

From dc4fc149b0e092acf44a3f5cd3704332e43c7a74 Mon Sep 17 00:00:00 2001
From: Yupeng Hou <houyupeng@ruc.edu.cn>
Date: Fri, 9 Jul 2021 06:15:36 +0000
Subject: [PATCH 08/12] FIX: update meta-data for test_dataset.py

---
 tests/data/test_dataset.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/data/test_dataset.py b/tests/data/test_dataset.py
index 65512d45d..1ddaeb1c4 100644
--- a/tests/data/test_dataset.py
+++ b/tests/data/test_dataset.py
@@ -4,9 +4,9 @@
 # @Email  : chenyushuo@ruc.edu.cn
 
 # UPDATE
-# @Time    :   2020/1/3, 2021/7/1
-# @Author  :   Yushuo Chen, Xingyu Pan
-# @email   :   chenyushuo@ruc.edu.cn, xy_pan@foxmail.com
+# @Time    :   2020/1/3, 2021/7/1, 2021/7/9
+# @Author  :   Yushuo Chen, Xingyu Pan, Yupeng Hou
+# @email   :   chenyushuo@ruc.edu.cn, xy_pan@foxmail.com, houyupeng@ruc.edu.cn
 
 import logging
 import os

From 27d5bfd12125b22b0634cca2c2029c6c1886f560 Mon Sep 17 00:00:00 2001
From: Yupeng Hou <houyupeng@ruc.edu.cn>
Date: Fri, 9 Jul 2021 18:18:18 +0000
Subject: [PATCH 09/12] FIX: remove arg 'augmentation'

---
 recbole/config/configurator.py                           | 9 ---------
 recbole/data/dataset/sequential_dataset.py               | 6 ++----
 recbole/properties/quick_start_config/sequential.yaml    | 1 -
 .../properties/quick_start_config/sequential_DIN.yaml    | 1 -
 4 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/recbole/config/configurator.py b/recbole/config/configurator.py
index 70b84b0b2..357648162 100644
--- a/recbole/config/configurator.py
+++ b/recbole/config/configurator.py
@@ -284,15 +284,6 @@ def _set_default_parameters(self):
         else:
             raise ValueError('Either Model has attr \'input_type\',' 'or arg \'loss_type\' should exist in config.')
 
-        if self.final_config_dict['MODEL_TYPE'] == ModelType.SEQUENTIAL and \
-           self.final_config_dict['benchmark_filename'] is not None and \
-           self.final_config_dict['augmentation']:
-            raise ValueError(
-                f'Benchmark datasets for sequential model {self.model} '
-                f'should be augmented in advance, which should not be augmented again and '
-                f'config \'augmentation\' should be False.'
-            )
-
         eval_type = None
         for metric in self.final_config_dict['metrics']:
             if metric.lower() in individual_metrics:
diff --git a/recbole/data/dataset/sequential_dataset.py b/recbole/data/dataset/sequential_dataset.py
index 1f03dff3f..8ce84b70a 100644
--- a/recbole/data/dataset/sequential_dataset.py
+++ b/recbole/data/dataset/sequential_dataset.py
@@ -26,13 +26,11 @@ class SequentialDataset(Dataset):
     which can accelerate the data loader.
 
     Attributes:
-        augmentation (bool): Whether the interactions should be augmented in RecBole.
         max_item_list_len (int): Max length of historical item list.
         item_list_length_field (str): Field name for item lists' length.
     """
 
     def __init__(self, config):
-        self.augmentation = config['augmentation']
         self.max_item_list_len = config['MAX_ITEM_LIST_LENGTH']
         self.item_list_length_field = config['ITEM_LIST_LENGTH_FIELD']
         super().__init__(config)
@@ -42,8 +40,8 @@ def _change_feat_format(self):
            then perform data augmentation.
         """
         super()._change_feat_format()
-        
-        if not self.augmentation:
+
+        if self.config['benchmark_filename'] is not None:
             return
         self.logger.debug('Augmentation for sequential recommendation.')
         self.data_augmentation()
diff --git a/recbole/properties/quick_start_config/sequential.yaml b/recbole/properties/quick_start_config/sequential.yaml
index 2370f18b4..d9e144449 100644
--- a/recbole/properties/quick_start_config/sequential.yaml
+++ b/recbole/properties/quick_start_config/sequential.yaml
@@ -2,4 +2,3 @@ eval_args:
   split: {'LS': 2}
   order: TO
   mode: full
-augmentation: True
diff --git a/recbole/properties/quick_start_config/sequential_DIN.yaml b/recbole/properties/quick_start_config/sequential_DIN.yaml
index 3e7f4a681..8d6b01edc 100644
--- a/recbole/properties/quick_start_config/sequential_DIN.yaml
+++ b/recbole/properties/quick_start_config/sequential_DIN.yaml
@@ -4,4 +4,3 @@ eval_args:
   mode: uni100
 metrics: ['AUC', 'LogLoss']
 valid_metric: AUC
-augmentation: True
\ No newline at end of file

From d00852d081e2b7378794c0776c88a66d1316cb5d Mon Sep 17 00:00:00 2001
From: Yupeng Hou <houyupeng@ruc.edu.cn>
Date: Sat, 10 Jul 2021 02:19:57 +0800
Subject: [PATCH 10/12] Update configurator.py

---
 recbole/config/configurator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recbole/config/configurator.py b/recbole/config/configurator.py
index 357648162..f2e15b21d 100644
--- a/recbole/config/configurator.py
+++ b/recbole/config/configurator.py
@@ -3,7 +3,7 @@
 # @Email  : linzihan.super@foxmail.com
 
 # UPDATE
-# @Time   : 2020/10/04, 2021/7/8, 2021/2/17, 2021/6/30
+# @Time   : 2020/10/04, 2021/3/2, 2021/2/17, 2021/6/30
 # @Author : Shanlei Mu, Yupeng Hou, Jiawei Guan, Xingyu Pan
 # @Email  : slmu@ruc.edu.cn, houyupeng@ruc.edu.cn, Guanjw@ruc.edu.cn, xy_pan@foxmail.com
 

From 497327dbe10b3d25bb61dfbeebee2381b7255c4b Mon Sep 17 00:00:00 2001
From: Yupeng Hou <houyupeng@ruc.edu.cn>
Date: Sat, 10 Jul 2021 03:22:23 +0000
Subject: [PATCH 11/12] FIX: remove arg augmentation

---
 recbole/properties/dataset/sample.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/recbole/properties/dataset/sample.yaml b/recbole/properties/dataset/sample.yaml
index ff4ac4915..d9869e5e2 100644
--- a/recbole/properties/dataset/sample.yaml
+++ b/recbole/properties/dataset/sample.yaml
@@ -43,7 +43,6 @@ normalize_field: ~
 normalize_all: ~
 
 # Sequential Model Needed
-augmentation: False
 ITEM_LIST_LENGTH_FIELD: item_length
 LIST_SUFFIX: _list
 MAX_ITEM_LIST_LENGTH: 50

From 99c8d591735e549ac2ffa09aba46c569266a9d70 Mon Sep 17 00:00:00 2001
From: Yupeng Hou <houyupeng@ruc.edu.cn>
Date: Sat, 10 Jul 2021 11:25:11 +0800
Subject: [PATCH 12/12] Update argument_list.py

---
 recbole/utils/argument_list.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/recbole/utils/argument_list.py b/recbole/utils/argument_list.py
index 31b719989..632f442bd 100644
--- a/recbole/utils/argument_list.py
+++ b/recbole/utils/argument_list.py
@@ -45,7 +45,6 @@
     'ITEM_LIST_LENGTH_FIELD', 'LIST_SUFFIX', 'MAX_ITEM_LIST_LENGTH', 'POSITION_FIELD',
     'HEAD_ENTITY_ID_FIELD', 'TAIL_ENTITY_ID_FIELD', 'RELATION_ID_FIELD', 'ENTITY_ID_FIELD',
     'load_col', 'unload_col', 'unused_col', 'additional_feat_suffix',
-    'augmentation',
     'max_user_inter_num', 'min_user_inter_num', 'max_item_inter_num', 'min_item_inter_num',
     'lowest_val', 'highest_val', 'equal_val', 'not_equal_val',
     'fields_in_same_space',