Skip to content

Commit

Permalink
Merge pull request #875 from ChangxinTian/data
Browse files Browse the repository at this point in the history
Add tensorboard and improve logger
  • Loading branch information
2017pxy authored Jul 11, 2021
2 parents c02ab8a + 2f449e7 commit 1090915
Show file tree
Hide file tree
Showing 17 changed files with 92 additions and 75 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
.idea/
*.pyc
*.log
log_tensorboard/*
saved/
*.lprof
*.egg-info/
Expand Down
4 changes: 2 additions & 2 deletions conda/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,23 @@ requirements:
- pandas >=1.0.5
- tqdm >=4.48.2
- pyyaml >=5.1.0
- matplotlib >=3.1.3
- scikit-learn >=0.23.2
- pytorch
- colorlog==4.7.2
- colorama==0.4.4
- tensorboard >=2.5.0
run:
- python
- numpy >=1.17.2
- scipy ==1.6.0
- pandas >=1.0.5
- tqdm >=4.48.2
- pyyaml >=5.1.0
- matplotlib >=3.1.3
- scikit-learn >=0.23.2
- pytorch
- colorlog==4.7.2
- colorama==0.4.4
- tensorboard >=2.5.0
test:
imports:
- recbole
Expand Down
1 change: 0 additions & 1 deletion docs/source/user_guide/config_settings.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ model training and evaluation.
which will clips gradient norm of model. Defaults to ``None``.
- ``loss_decimal_place(int)``: The decimal place of training loss. Defaults to ``4``.
- ``weight_decay (float)`` : Weight decay (L2 penalty), used for `optimizer <https://pytorch.org/docs/stable/optim.html?highlight=weight_decay>`_. Default to ``0.0``.
- ``draw_loss_pic (bool)``: Draw the training loss line graph of model if it's ``True``, the pic is a PDF file and will be saved in your run directory after model training. Default to ``False``.


**Evaluation Setting**
Expand Down
3 changes: 1 addition & 2 deletions recbole/config/configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@

from recbole.evaluator import group_metrics, individual_metrics
from recbole.utils import get_model, Enum, EvaluatorType, ModelType, InputType, \
general_arguments, training_arguments, evaluation_arguments, dataset_arguments
from recbole.utils.utils import set_color
general_arguments, training_arguments, evaluation_arguments, dataset_arguments, set_color


class Config(object):
Expand Down
3 changes: 1 addition & 2 deletions recbole/data/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@
from scipy.sparse import coo_matrix

from recbole.data.interaction import Interaction
from recbole.utils import FeatureSource, FeatureType, get_local_time
from recbole.utils.utils import set_color
from recbole.utils import FeatureSource, FeatureType, get_local_time, set_color
from recbole.utils.url import decide_download, download_url, extract_zip, makedirs, rename_atomic_files


Expand Down
3 changes: 1 addition & 2 deletions recbole/data/dataset/kg_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
from scipy.sparse import coo_matrix

from recbole.data.dataset import Dataset
from recbole.utils import FeatureSource, FeatureType
from recbole.utils.utils import set_color
from recbole.utils import FeatureSource, FeatureType, set_color
from recbole.utils.url import decide_download, download_url, extract_zip


Expand Down
3 changes: 1 addition & 2 deletions recbole/data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@

from recbole.data.dataloader import *
from recbole.sampler import KGSampler, Sampler, RepeatableSampler
from recbole.utils import ModelType, ensure_dir, get_local_time
from recbole.utils.utils import set_color
from recbole.utils import ModelType, ensure_dir, get_local_time, set_color


def create_dataset(config):
Expand Down
3 changes: 1 addition & 2 deletions recbole/model/abstract_recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@
import torch.nn as nn

from recbole.model.layers import FMEmbedding, FMFirstOrderLinear
from recbole.utils import ModelType, InputType, FeatureSource, FeatureType
from recbole.utils.utils import set_color
from recbole.utils import ModelType, InputType, FeatureSource, FeatureType, set_color


class AbstractRecommender(nn.Module):
Expand Down
11 changes: 3 additions & 8 deletions recbole/properties/dataset/sample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,10 @@ additional_feat_suffix: ~

# Filtering
rm_dup_inter: ~
lowest_val: ~
highest_val: ~
equal_val: ~
not_equal_val: ~
val_interval: ~
filter_inter_by_user_or_item: True
max_user_inter_num: ~
min_user_inter_num: 0
max_item_inter_num: ~
min_item_inter_num: 0
item_inter_num_interval: "[0,inf)"
user_inter_num_interval: "[0,inf)"

# Preprocessing
alias_of_user_id: ~
Expand Down
3 changes: 1 addition & 2 deletions recbole/quick_start/quick_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@

from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.utils import init_logger, get_model, get_trainer, init_seed
from recbole.utils.utils import set_color
from recbole.utils import init_logger, get_model, get_trainer, init_seed, set_color


def run_recbole(model=None, dataset=None, config_file_list=None, config_dict=None, saved=True):
Expand Down
48 changes: 18 additions & 30 deletions recbole/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@
from recbole.data.interaction import Interaction
from recbole.evaluator import ProxyEvaluator
from recbole.utils import ensure_dir, get_local_time, early_stopping, calculate_valid_score, dict2str, \
DataLoaderType, KGDataLoaderState
from recbole.utils.utils import set_color
DataLoaderType, KGDataLoaderState, get_tensorboard, set_color


class AbstractTrainer(object):
Expand Down Expand Up @@ -77,6 +76,7 @@ def __init__(self, config, model):
super(Trainer, self).__init__(config, model)

self.logger = getLogger()
self.tensorboard = get_tensorboard(self.logger)
self.learner = config['learner']
self.learning_rate = config['learning_rate']
self.epochs = config['epochs']
Expand All @@ -92,7 +92,6 @@ def __init__(self, config, model):
saved_model_file = '{}-{}.pth'.format(self.config['model'], get_local_time())
self.saved_model_file = os.path.join(self.checkpoint_dir, saved_model_file)
self.weight_decay = config['weight_decay']
self.draw_loss_pic = config['draw_loss_pic']

self.start_epoch = 0
self.cur_step = 0
Expand Down Expand Up @@ -245,6 +244,13 @@ def _generate_train_loss_output(self, epoch_idx, s_time, e_time, losses):
train_loss_output += set_color('train loss', 'blue') + ': ' + des % losses
return train_loss_output + ']'

def _add_train_loss_to_tensorboard(self, epoch_idx, losses, tag='Loss/Train'):
if isinstance(losses, tuple):
for idx, loss in enumerate(losses):
self.tensorboard.add_scalar(tag + str(idx), loss, epoch_idx)
else:
self.tensorboard.add_scalar(tag, losses, epoch_idx)

def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progress=False, callback_fn=None):
r"""Train the model based on the train data and the valid data.
Expand Down Expand Up @@ -274,6 +280,7 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre
self._generate_train_loss_output(epoch_idx, training_start_time, training_end_time, train_loss)
if verbose:
self.logger.info(train_loss_output)
self._add_train_loss_to_tensorboard(epoch_idx, train_loss)

# eval
if self.eval_step <= 0 or not valid_data:
Expand Down Expand Up @@ -301,6 +308,8 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre
if verbose:
self.logger.info(valid_score_output)
self.logger.info(valid_result_output)
self.tensorboard.add_scalar('Vaild_score', valid_score, epoch_idx)

if update_flag:
if saved:
self._save_checkpoint(epoch_idx)
Expand All @@ -318,9 +327,6 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre
if verbose:
self.logger.info(stop_output)
break
if self.draw_loss_pic:
save_path = '{}-{}-train_loss.pdf'.format(self.config['model'], get_local_time())
self.plot_train_loss(save_path=os.path.join(save_path))
return self.best_valid_score, self.best_valid_result

def _full_sort_batch_eval(self, batched_data):
Expand Down Expand Up @@ -425,30 +431,6 @@ def _spilt_predict(self, interaction, batch_size):
result_list.append(result)
return torch.cat(result_list, dim=0)

def plot_train_loss(self, show=True, save_path=None):
r"""Plot the train loss in each epoch
Args:
show (bool, optional): Whether to show this figure, default: True
save_path (str, optional): The data path to save the figure, default: None.
If it's None, it will not be saved.
"""
import matplotlib.pyplot as plt
import time
epochs = list(self.train_loss_dict.keys())
epochs.sort()
values = [float(self.train_loss_dict[epoch]) for epoch in epochs]
plt.plot(epochs, values)
my_x_ticks = np.arange(0, len(epochs), int(len(epochs) / 10))
plt.xticks(my_x_ticks)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title(self.config['model'] + ' ' + time.strftime("%Y-%m-%d %H:%M", time.localtime(time.time())))
if show:
plt.show()
if save_path:
plt.savefig(save_path)


class KGTrainer(Trainer):
r"""KGTrainer is designed for Knowledge-aware recommendation methods. Some of these models need to train the
Expand Down Expand Up @@ -543,6 +525,7 @@ def pretrain(self, train_data, verbose=True, show_progress=False):
self._generate_train_loss_output(epoch_idx, training_start_time, training_end_time, train_loss)
if verbose:
self.logger.info(train_loss_output)
self._add_train_loss_to_tensorboard(epoch_idx, train_loss)

if (epoch_idx + 1) % self.config['save_step'] == 0:
saved_model_file = os.path.join(
Expand Down Expand Up @@ -614,6 +597,7 @@ def __init__(self, config, model):
super(DecisionTreeTrainer, self).__init__(config, model)

self.logger = getLogger()
self.tensorboard = get_tensorboard(self.logger)
self.label_field = config['LABEL_FIELD']
self.convert_token_to_onehot = self.config['convert_token_to_onehot']

Expand Down Expand Up @@ -718,6 +702,7 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre
if verbose:
self.logger.info(valid_score_output)
self.logger.info(valid_result_output)
self.tensorboard.add_scalar('Vaild_score', valid_score, epoch_idx)

self.best_valid_score = valid_score
self.best_valid_result = valid_result
Expand Down Expand Up @@ -837,6 +822,7 @@ def pretrain(self, train_data, verbose=True, show_progress=False):
self._generate_train_loss_output(epoch_idx, training_start_time, training_end_time, train_loss)
if verbose:
self.logger.info(train_loss_output)
self._add_train_loss_to_tensorboard(epoch_idx, train_loss)

if (epoch_idx + 1) % self.pretrain_epochs == 0:
saved_model_file = os.path.join(
Expand Down Expand Up @@ -1012,6 +998,7 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre
self._generate_train_loss_output(epoch_idx, training_start_time, training_end_time, train_loss)
if verbose:
self.logger.info(train_loss_output)
self._add_train_loss_to_tensorboard(epoch_idx, train_loss)

# eval
if self.eval_step <= 0 or not valid_data:
Expand Down Expand Up @@ -1039,6 +1026,7 @@ def fit(self, train_data, valid_data=None, verbose=True, saved=True, show_progre
if verbose:
self.logger.info(valid_score_output)
self.logger.info(valid_result_output)
self.tensorboard.add_scalar('Vaild_score', valid_score, epoch_idx)
if update_flag:
if saved:
self._save_checkpoint(epoch_idx)
Expand Down
6 changes: 3 additions & 3 deletions recbole/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from recbole.utils.logger import init_logger
from recbole.utils.logger import init_logger, set_color
from recbole.utils.utils import get_local_time, ensure_dir, get_model, get_trainer, \
early_stopping, calculate_valid_score, dict2str, init_seed
early_stopping, calculate_valid_score, dict2str, init_seed, get_tensorboard
from recbole.utils.enum_type import *
from recbole.utils.argument_list import *

__all__ = [
'init_logger', 'get_local_time', 'ensure_dir', 'get_model', 'get_trainer', 'early_stopping',
'calculate_valid_score', 'dict2str', 'Enum', 'ModelType', 'DataLoaderType', 'KGDataLoaderState', 'EvaluatorType',
'InputType', 'FeatureType', 'FeatureSource', 'init_seed', 'general_arguments', 'training_arguments',
'evaluation_arguments', 'dataset_arguments'
'evaluation_arguments', 'dataset_arguments', 'get_tensorboard', 'set_color'
]
4 changes: 2 additions & 2 deletions recbole/utils/argument_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@
'ITEM_LIST_LENGTH_FIELD', 'LIST_SUFFIX', 'MAX_ITEM_LIST_LENGTH', 'POSITION_FIELD',
'HEAD_ENTITY_ID_FIELD', 'TAIL_ENTITY_ID_FIELD', 'RELATION_ID_FIELD', 'ENTITY_ID_FIELD',
'load_col', 'unload_col', 'unused_col', 'additional_feat_suffix',
'max_user_inter_num', 'min_user_inter_num', 'max_item_inter_num', 'min_item_inter_num',
'lowest_val', 'highest_val', 'equal_val', 'not_equal_val',
'user_inter_num_interval', 'item_inter_num_interval ',
'val_interval',
'alias_of_user_id', 'alias_of_item_id', 'alias_of_entity_id', 'alias_of_relation_id',
'preload_weight',
'normalize_field', 'normalize_all'
Expand Down
29 changes: 28 additions & 1 deletion recbole/utils/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import logging
import os
import colorlog
import re

from recbole.utils.utils import get_local_time, ensure_dir
from colorama import init
Expand All @@ -28,6 +29,29 @@
}


class RemoveColorFilter(logging.Filter):
def filter(self, record):
if record:
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
record.msg = ansi_escape.sub('', str(record.msg))
return True


def set_color(log, color, highlight=True):
color_set = ['black', 'red', 'green', 'yellow', 'blue', 'pink', 'cyan', 'white']
try:
index = color_set.index(color)
except:
index = len(color_set) - 1
prev_log = '\033['
if highlight:
prev_log += '1;3'
else:
prev_log += '0;3'
prev_log += str(index) + 'm'
return prev_log + log + '\033[0m'


def init_logger(config):
"""
A logger that can show a message on standard output and write it into the
Expand Down Expand Up @@ -70,12 +94,15 @@ def init_logger(config):
level = logging.CRITICAL
else:
level = logging.INFO

fh = logging.FileHandler(logfilepath)
fh.setLevel(level)
fh.setFormatter(fileformatter)
remove_color_filter = RemoveColorFilter()
fh.addFilter(remove_color_filter)

sh = logging.StreamHandler()
sh.setLevel(level)
sh.setFormatter(sformatter)

logging.basicConfig(level=level, handlers=[fh, sh])
logging.basicConfig(level=level, handlers=[sh, fh])
39 changes: 26 additions & 13 deletions recbole/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import numpy as np
import torch
from torch.utils.tensorboard import SummaryWriter

from recbole.utils.enum_type import ModelType

Expand Down Expand Up @@ -192,16 +193,28 @@ def init_seed(seed, reproducibility):
torch.backends.cudnn.deterministic = False


def set_color(log, color, highlight=True):
color_set = ['black', 'red', 'green', 'yellow', 'blue', 'pink', 'cyan', 'white']
try:
index = color_set.index(color)
except:
index = len(color_set) - 1
prev_log = '\033['
if highlight:
prev_log += '1;3'
else:
prev_log += '0;3'
prev_log += str(index) + 'm'
return prev_log + log + '\033[0m'
def get_tensorboard(logger):
r""" Creates a SummaryWriter of Tensorboard that can log PyTorch models and metrics into a directory for
visualization within the TensorBoard UI.
For the convenience of the user, the naming rule of the SummaryWriter's log_dir is the same as the logger.
Args:
logger: its output filename is used to name the SummaryWriter's log_dir.
If the filename is not available, we will name the log_dir according to the current time.
Returns:
SummaryWriter: it will write out events and summaries to the event file.
"""
base_path = 'log_tensorboard'

dir_name = None
for handler in logger.handlers:
if hasattr(handler, "baseFilename"):
dir_name = os.path.basename(getattr(handler, 'baseFilename')).split('.')[0]
break
if dir_name is None:
dir_name = '{}-{}'.format('model', get_local_time())

dir_path = os.path.join(base_path, dir_name)
writer = SummaryWriter(dir_path)
return writer
Loading

0 comments on commit 1090915

Please sign in to comment.