From 190b6e1df83e6636313ae3b812e967771c1b736f Mon Sep 17 00:00:00 2001 From: BDonnot Date: Tue, 16 Jun 2020 12:02:49 +0200 Subject: [PATCH] udpating documentation to further improve issues #12 and #14 --- docs/DeepQSimple.rst | 1 + docs/DuelQLeapNet.rst | 2 ++ docs/DuelQSimple.rst | 1 + docs/SAC.rst | 1 + l2rpn_baselines/DeepQSimple/train.py | 13 +++++-------- l2rpn_baselines/DuelQLeapNet/train.py | 18 ++++++++---------- l2rpn_baselines/DuelQSimple/train.py | 13 +++++-------- l2rpn_baselines/SAC/train.py | 12 ++++-------- l2rpn_baselines/test/test_train_eval.py | 8 +++----- l2rpn_baselines/utils/BaseDeepQ.py | 7 +++++-- l2rpn_baselines/utils/DeepQAgent.py | 2 +- l2rpn_baselines/utils/TrainingParam.py | 6 ------ l2rpn_baselines/utils/make_multi_env.py | 10 +++++++++- 13 files changed, 45 insertions(+), 49 deletions(-) diff --git a/docs/DeepQSimple.rst b/docs/DeepQSimple.rst index 6e5376b..b572462 100644 --- a/docs/DeepQSimple.rst +++ b/docs/DeepQSimple.rst @@ -6,6 +6,7 @@ Description This file serves as an concrete example on how to implement a baseline, even more concretely than the "do nothing" baseline. Don't expect to obtain state of the art method with this simple method however. +An example to train this model is available in the train function :ref:`Example-deepqsimple` Exported class -------------- diff --git a/docs/DuelQLeapNet.rst b/docs/DuelQLeapNet.rst index a5bdd6d..37dfdbd 100644 --- a/docs/DuelQLeapNet.rst +++ b/docs/DuelQLeapNet.rst @@ -13,6 +13,8 @@ powerlines based on the injection and the topology. In this baseline, we use this very same architecture to model the Q function. The D3QN RL method is used. +An example to train this model is available in the train function :ref:`Example-leapnet`. + Exported class -------------- You can use this class with: diff --git a/docs/DuelQSimple.rst b/docs/DuelQSimple.rst index be0ea8b..947a727 100644 --- a/docs/DuelQSimple.rst +++ b/docs/DuelQSimple.rst @@ -8,6 +8,7 @@ Description This file serves as an concrete example on how to implement a baseline, even more concretely than the "do nothing" baseline. Don't expect to obtain state of the art method with this simple method however. +An example to train this model is available in the train function :ref:`Example-duelqsimple`. Exported class -------------- diff --git a/docs/SAC.rst b/docs/SAC.rst index 92f0690..772368e 100644 --- a/docs/SAC.rst +++ b/docs/SAC.rst @@ -9,6 +9,7 @@ Description ----------- This module proposes an implementation of the SAC algorithm. +An example to train this model is available in the train function :ref:`Example-sac`. Exported class -------------- diff --git a/l2rpn_baselines/DeepQSimple/train.py b/l2rpn_baselines/DeepQSimple/train.py index d500fd7..3089845 100755 --- a/l2rpn_baselines/DeepQSimple/train.py +++ b/l2rpn_baselines/DeepQSimple/train.py @@ -26,7 +26,6 @@ def train(env, save_path=None, load_path=None, logs_dir=None, - nb_env=1, training_param=None, filter_action_fun=None, kwargs_converters={}, @@ -56,10 +55,6 @@ def train(env, logs_dir: ``str`` Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them. - nb_env: ``int`` - Number of environments used in parrallel. Note that if nb_env > 1, some functions might not be usable. Also, - if nb_env > 1 make sure that the `env` argument is a grid2op MultiEnvMultiProcess. - training_param: :class:`l2rpn_baselines.utils.TrainingParam` The parameters describing the way you will train your model. @@ -84,9 +79,13 @@ def train(env, baseline: :class:`DeepQSimple` The trained baseline. + + .. _Example-deepqsimple: + Examples --------- - Here is an example on how to train a DeepSimple baseline. + + Here is an example on how to train a DeepQSimple baseline. First define a python script, for example @@ -134,7 +133,6 @@ def train(env, save_path="/WHERE/I/SAVED/THE/MODEL", load_path=None, logs_dir="/WHERE/I/SAVED/THE/LOGS", - nb_env=1, training_param=tp, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) @@ -177,7 +175,6 @@ def train(env, nn_archi=nn_archi, name=name, istraining=True, - nb_env=nb_env, verbose=verbose, **kwargs_converters ) diff --git a/l2rpn_baselines/DuelQLeapNet/train.py b/l2rpn_baselines/DuelQLeapNet/train.py index bd5568f..f507937 100755 --- a/l2rpn_baselines/DuelQLeapNet/train.py +++ b/l2rpn_baselines/DuelQLeapNet/train.py @@ -26,7 +26,6 @@ def train(env, save_path=None, load_path=None, logs_dir=None, - nb_env=1, training_param=None, filter_action_fun=None, verbose=True, @@ -56,10 +55,6 @@ def train(env, logs_dir: ``str`` Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them. - nb_env: ``int`` - Number of environments used in parrallel. Note that if nb_env > 1, some functions might not be usable. Also, - if nb_env > 1 make sure that the `env` argument is a grid2op MultiEnvMultiProcess. - training_param: :class:`l2rpn_baselines.utils.TrainingParam` The parameters describing the way you will train your model. @@ -84,9 +79,12 @@ def train(env, baseline: :class:`DuelQLeapNet` The trained baseline. + + .. _Example-leapnet: + Examples --------- - Here is an example on how to train a DeepSimple baseline. + Here is an example on how to train a DuelQLeapNet baseline. First define a python script, for example @@ -141,14 +139,15 @@ def train(env, } # define the name of the model nm_ = "AnneOnymous" + save_path = "/WHERE/I/SAVED/THE/MODEL" + logs_dir = "/WHERE/I/SAVED/THE/LOGS" try: train(env, name=nm_, iterations=10000, - save_path="/WHERE/I/SAVED/THE/MODEL", + save_path=save_path, load_path=None, - logs_dir="/WHERE/I/SAVED/THE/LOGS", - nb_env=1, + logs_dir=logs_dir, training_param=tp, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) @@ -191,7 +190,6 @@ def train(env, nn_archi=nn_archi, name=name, istraining=True, - nb_env=nb_env, filter_action_fun=filter_action_fun, verbose=verbose, **kwargs_converters diff --git a/l2rpn_baselines/DuelQSimple/train.py b/l2rpn_baselines/DuelQSimple/train.py index d7a8f29..239f851 100755 --- a/l2rpn_baselines/DuelQSimple/train.py +++ b/l2rpn_baselines/DuelQSimple/train.py @@ -26,7 +26,6 @@ def train(env, save_path=None, load_path=None, logs_dir=None, - nb_env=1, training_param=None, filter_action_fun=None, verbose=True, @@ -57,10 +56,6 @@ def train(env, logs_dir: ``str`` Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them. - nb_env: ``int`` - Number of environments used in parrallel. Note that if nb_env > 1, some functions might not be usable. Also, - if nb_env > 1 make sure that the `env` argument is a grid2op MultiEnvMultiProcess. - verbose: ``bool`` If you want something to be printed on the terminal (a better logging strategy will be put at some point) @@ -85,9 +80,13 @@ def train(env, baseline: :class:`DeepQSimple` The trained baseline. + + .. _Example-duelqsimple: + Examples --------- - Here is an example on how to train a DeepSimple baseline. + + Here is an example on how to train a DuelQSimple baseline. First define a python script, for example @@ -135,7 +134,6 @@ def train(env, save_path="/WHERE/I/SAVED/THE/MODEL", load_path=None, logs_dir="/WHERE/I/SAVED/THE/LOGS", - nb_env=1, training_param=tp, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) @@ -178,7 +176,6 @@ def train(env, nn_archi=nn_archi, name=name, istraining=True, - nb_env=nb_env, verbose=verbose, **kwargs_converters ) diff --git a/l2rpn_baselines/SAC/train.py b/l2rpn_baselines/SAC/train.py index df1d773..2262f5c 100755 --- a/l2rpn_baselines/SAC/train.py +++ b/l2rpn_baselines/SAC/train.py @@ -26,7 +26,6 @@ def train(env, save_path=None, load_path=None, logs_dir=None, - nb_env=1, training_param=None, filter_action_fun=None, verbose=True, @@ -56,10 +55,6 @@ def train(env, logs_dir: ``str`` Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them. - nb_env: ``int`` - Number of environments used in parrallel. Note that if nb_env > 1, some functions might not be usable. Also, - if nb_env > 1 make sure that the `env` argument is a grid2op MultiEnvMultiProcess. - verbose: ``bool`` If you want something to be printed on the terminal (a better logging strategy will be put at some point) @@ -84,9 +79,12 @@ def train(env, baseline: :class:`DeepQSimple` The trained baseline. + + .. _Example-sac: + Examples --------- - Here is an example on how to train a DeepSimple baseline. + Here is an example on how to train a SAC baseline. First define a python script, for example @@ -141,7 +139,6 @@ def train(env, save_path="/WHERE/I/SAVED/THE/MODEL", load_path=None, logs_dir="/WHERE/I/SAVED/THE/LOGS", - nb_env=1, training_param=tp, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) @@ -184,7 +181,6 @@ def train(env, nn_archi=nn_archi, name=name, istraining=True, - nb_env=nb_env, verbose=verbose, **kwargs_converters ) diff --git a/l2rpn_baselines/test/test_train_eval.py b/l2rpn_baselines/test/test_train_eval.py index 98e5ce0..f867286 100644 --- a/l2rpn_baselines/test/test_train_eval.py +++ b/l2rpn_baselines/test/test_train_eval.py @@ -11,10 +11,12 @@ import unittest import warnings import tempfile +import logging os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +logging.getLogger('tensorflow').setLevel(logging.FATAL) + import grid2op -from grid2op.Environment import MultiEnvironment from l2rpn_baselines.utils import TrainingParam, NNParam, make_multi_env from l2rpn_baselines.DeepQSimple import train as train_dqn @@ -69,7 +71,6 @@ def test_train_eval(self): save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, - nb_env=1, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, @@ -167,7 +168,6 @@ def test_train_eval(self): save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, - nb_env=1, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, @@ -224,7 +224,6 @@ def test_train_eval(self): save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, - nb_env=1, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, @@ -282,7 +281,6 @@ def test_train_eval(self): save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, - nb_env=1, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, diff --git a/l2rpn_baselines/utils/BaseDeepQ.py b/l2rpn_baselines/utils/BaseDeepQ.py index d21c811..2e1d8d8 100644 --- a/l2rpn_baselines/utils/BaseDeepQ.py +++ b/l2rpn_baselines/utils/BaseDeepQ.py @@ -71,10 +71,12 @@ class BaseDeepQ(ABC): def __init__(self, nn_params, - training_param=None): + training_param=None, + verbose=False): self._action_size = nn_params.action_size self._observation_size = nn_params.observation_size self._nn_archi = nn_params + self.verbose = verbose if training_param is None: self._training_param = TrainingParam() @@ -207,7 +209,8 @@ def load_network(self, path, name=None, ext="h5"): with warnings.catch_warnings(): warnings.filterwarnings("ignore") self._target_model = load_model('{}.{}'.format(path_target_model, ext), custom_objects=self._custom_objects) - print("Succesfully loaded network.") + if self.verbose: + print("Succesfully loaded network.") def target_train(self): """ diff --git a/l2rpn_baselines/utils/DeepQAgent.py b/l2rpn_baselines/utils/DeepQAgent.py index 535fb81..f8334e0 100644 --- a/l2rpn_baselines/utils/DeepQAgent.py +++ b/l2rpn_baselines/utils/DeepQAgent.py @@ -353,7 +353,7 @@ def train(self, Parameters ---------- - env: :class:`grid2op.Environment.Environment` + env: :class:`grid2op.Environment.Environment` or :class:`grid2op.Environment.MultiEnvironment` The environment used to train your model. iterations: ``int`` diff --git a/l2rpn_baselines/utils/TrainingParam.py b/l2rpn_baselines/utils/TrainingParam.py index c186220..df46c5e 100644 --- a/l2rpn_baselines/utils/TrainingParam.py +++ b/l2rpn_baselines/utils/TrainingParam.py @@ -189,34 +189,28 @@ def __init__(self, @property def final_epsilon(self): - """return the final epsilon allowed by this instance""" return self._final_epsilon @final_epsilon.setter def final_epsilon(self, final_epsilon): - """used to update the final_epsilon""" self._final_epsilon = final_epsilon self._compute_exp_facto() @property def initial_epsilon(self): - """get the intial epsilon used for epsilon greedy""" return self._initial_epsilon @initial_epsilon.setter def initial_epsilon(self, initial_epsilon): - """used to update the initial_epsilon attribute""" self._initial_epsilon = initial_epsilon self._compute_exp_facto() @property def update_nb_iter(self): - """update the total number of iteration you want to make""" return self._update_nb_iter @update_nb_iter.setter def update_nb_iter(self, update_nb_iter): - """update the total number of iteration you want to make""" self._update_nb_iter = update_nb_iter if self._update_nb_iter is not None and self._update_nb_iter > 0: self._1_update_nb_iter = 1.0 / self._update_nb_iter diff --git a/l2rpn_baselines/utils/make_multi_env.py b/l2rpn_baselines/utils/make_multi_env.py index f157277..6222f28 100644 --- a/l2rpn_baselines/utils/make_multi_env.py +++ b/l2rpn_baselines/utils/make_multi_env.py @@ -7,7 +7,15 @@ # This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions. import warnings -from grid2op.Environment import MultiEnvironment, Environment +from grid2op.Environment import Environment +try: + from grid2op.Environment import MultiEnvironment +except ImportError: + # name will be change as of grid2op >= 1.0.0 + try: + from grid2op.Environment import MultiEnvMultiProcess as MultiEnvironment + except ImportError as exc: + raise exc def make_multi_env(env_init, nb_env):