From ced587c3674bde989a414bea47d6d88542a707fe Mon Sep 17 00:00:00 2001 From: Yupeng Hou Date: Tue, 13 Jul 2021 12:22:37 +0000 Subject: [PATCH] FIX: bugs of seqlen for benchmarks --- recbole/data/dataset/dataset.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/recbole/data/dataset/dataset.py b/recbole/data/dataset/dataset.py index 63b7751cc..7d5053c1b 100644 --- a/recbole/data/dataset/dataset.py +++ b/recbole/data/dataset/dataset.py @@ -3,7 +3,7 @@ # @Email : houyupeng@ruc.edu.cn # UPDATE: -# @Time : 2021/7/11 2021/7/1, 2020/11/10 +# @Time : 2021/7/13 2021/7/1, 2020/11/10 # @Author : Yupeng Hou, Xingyu Pan, Yushuo Chen # @Email : houyupeng@ruc.edu.cn, xy_pan@foxmail.com, chenyushuo@ruc.edu.cn @@ -16,7 +16,7 @@ import pickle import os import yaml -from collections import Counter +from collections import Counter, defaultdict from logging import getLogger import numpy as np @@ -272,16 +272,21 @@ def _load_inter_feat(self, token, dataset_path): else: sub_inter_lens = [] sub_inter_feats = [] + overall_field2seqlen = defaultdict(int) for filename in self.benchmark_filename_list: file_path = os.path.join(dataset_path, f'{token}.{filename}.inter') if os.path.isfile(file_path): temp = self._load_feat(file_path, FeatureSource.INTERACTION) sub_inter_feats.append(temp) sub_inter_lens.append(len(temp)) + for field in self.field2seqlen: + overall_field2seqlen[field] = max( + overall_field2seqlen[field], self.field2seqlen[field]) else: raise ValueError(f'File {file_path} not exist.') inter_feat = pd.concat(sub_inter_feats) self.inter_feat, self.file_size_list = inter_feat, sub_inter_lens + self.field2seqlen = overall_field2seqlen def _load_user_or_item_feat(self, token, dataset_path, source, field_name): """Load user/item features.