From 190b6e1df83e6636313ae3b812e967771c1b736f Mon Sep 17 00:00:00 2001
From: BDonnot <benjamin.donnot@gmail.com>
Date: Tue, 16 Jun 2020 12:02:49 +0200
Subject: [PATCH] udpating documentation to further improve issues #12 and #14

---
 docs/DeepQSimple.rst                    |  1 +
 docs/DuelQLeapNet.rst                   |  2 ++
 docs/DuelQSimple.rst                    |  1 +
 docs/SAC.rst                            |  1 +
 l2rpn_baselines/DeepQSimple/train.py    | 13 +++++--------
 l2rpn_baselines/DuelQLeapNet/train.py   | 18 ++++++++----------
 l2rpn_baselines/DuelQSimple/train.py    | 13 +++++--------
 l2rpn_baselines/SAC/train.py            | 12 ++++--------
 l2rpn_baselines/test/test_train_eval.py |  8 +++-----
 l2rpn_baselines/utils/BaseDeepQ.py      |  7 +++++--
 l2rpn_baselines/utils/DeepQAgent.py     |  2 +-
 l2rpn_baselines/utils/TrainingParam.py  |  6 ------
 l2rpn_baselines/utils/make_multi_env.py | 10 +++++++++-
 13 files changed, 45 insertions(+), 49 deletions(-)

diff --git a/docs/DeepQSimple.rst b/docs/DeepQSimple.rst
index 6e5376b..b572462 100644
--- a/docs/DeepQSimple.rst
+++ b/docs/DeepQSimple.rst
@@ -6,6 +6,7 @@ Description
 This file serves as an concrete example on how to implement a baseline, even more concretely than the "do nothing"
 baseline. Don't expect to obtain state of the art method with this simple method however.
 
+An example to train this model is available in the train function :ref:`Example-deepqsimple`
 
 Exported class
 --------------
diff --git a/docs/DuelQLeapNet.rst b/docs/DuelQLeapNet.rst
index a5bdd6d..37dfdbd 100644
--- a/docs/DuelQLeapNet.rst
+++ b/docs/DuelQLeapNet.rst
@@ -13,6 +13,8 @@ powerlines based on the injection and the topology.
 
 In this baseline, we use this very same architecture to model the Q function. The D3QN RL method is used.
 
+An example to train this model is available in the train function :ref:`Example-leapnet`.
+
 Exported class
 --------------
 You can use this class with:
diff --git a/docs/DuelQSimple.rst b/docs/DuelQSimple.rst
index be0ea8b..947a727 100644
--- a/docs/DuelQSimple.rst
+++ b/docs/DuelQSimple.rst
@@ -8,6 +8,7 @@ Description
 This file serves as an concrete example on how to implement a baseline, even more concretely than the "do nothing"
 baseline. Don't expect to obtain state of the art method with this simple method however.
 
+An example to train this model is available in the train function :ref:`Example-duelqsimple`.
 
 Exported class
 --------------
diff --git a/docs/SAC.rst b/docs/SAC.rst
index 92f0690..772368e 100644
--- a/docs/SAC.rst
+++ b/docs/SAC.rst
@@ -9,6 +9,7 @@ Description
 -----------
 This module proposes an implementation of the SAC algorithm.
 
+An example to train this model is available in the train function :ref:`Example-sac`.
 
 Exported class
 --------------
diff --git a/l2rpn_baselines/DeepQSimple/train.py b/l2rpn_baselines/DeepQSimple/train.py
index d500fd7..3089845 100755
--- a/l2rpn_baselines/DeepQSimple/train.py
+++ b/l2rpn_baselines/DeepQSimple/train.py
@@ -26,7 +26,6 @@ def train(env,
           save_path=None,
           load_path=None,
           logs_dir=None,
-          nb_env=1,
           training_param=None,
           filter_action_fun=None,
           kwargs_converters={},
@@ -56,10 +55,6 @@ def train(env,
     logs_dir: ``str``
         Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them.
 
-    nb_env: ``int``
-        Number of environments used in parrallel. Note that if nb_env > 1, some functions might not be usable. Also,
-        if nb_env > 1 make sure that the `env` argument is a grid2op MultiEnvMultiProcess.
-
     training_param: :class:`l2rpn_baselines.utils.TrainingParam`
         The parameters describing the way you will train your model.
 
@@ -84,9 +79,13 @@ def train(env,
     baseline: :class:`DeepQSimple`
         The trained baseline.
 
+
+    .. _Example-deepqsimple:
+
     Examples
     ---------
-    Here is an example on how to train a DeepSimple baseline.
+
+    Here is an example on how to train a DeepQSimple baseline.
 
     First define a python script, for example
 
@@ -134,7 +133,6 @@ def train(env,
                   save_path="/WHERE/I/SAVED/THE/MODEL",
                   load_path=None,
                   logs_dir="/WHERE/I/SAVED/THE/LOGS",
-                  nb_env=1,
                   training_param=tp,
                   kwargs_converters=kwargs_converters,
                   kwargs_archi=kwargs_archi)
@@ -177,7 +175,6 @@ def train(env,
                            nn_archi=nn_archi,
                            name=name,
                            istraining=True,
-                           nb_env=nb_env,
                            verbose=verbose,
                             **kwargs_converters
                             )
diff --git a/l2rpn_baselines/DuelQLeapNet/train.py b/l2rpn_baselines/DuelQLeapNet/train.py
index bd5568f..f507937 100755
--- a/l2rpn_baselines/DuelQLeapNet/train.py
+++ b/l2rpn_baselines/DuelQLeapNet/train.py
@@ -26,7 +26,6 @@ def train(env,
           save_path=None,
           load_path=None,
           logs_dir=None,
-          nb_env=1,
           training_param=None,
           filter_action_fun=None,
           verbose=True,
@@ -56,10 +55,6 @@ def train(env,
     logs_dir: ``str``
         Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them.
 
-    nb_env: ``int``
-        Number of environments used in parrallel. Note that if nb_env > 1, some functions might not be usable. Also,
-        if nb_env > 1 make sure that the `env` argument is a grid2op MultiEnvMultiProcess.
-
     training_param: :class:`l2rpn_baselines.utils.TrainingParam`
         The parameters describing the way you will train your model.
 
@@ -84,9 +79,12 @@ def train(env,
     baseline: :class:`DuelQLeapNet`
         The trained baseline.
 
+
+    .. _Example-leapnet:
+
     Examples
     ---------
-    Here is an example on how to train a DeepSimple baseline.
+    Here is an example on how to train a DuelQLeapNet baseline.
 
     First define a python script, for example
 
@@ -141,14 +139,15 @@ def train(env,
                              }
         # define the name of the model
         nm_ = "AnneOnymous"
+        save_path = "/WHERE/I/SAVED/THE/MODEL"
+        logs_dir = "/WHERE/I/SAVED/THE/LOGS"
         try:
             train(env,
                   name=nm_,
                   iterations=10000,
-                  save_path="/WHERE/I/SAVED/THE/MODEL",
+                  save_path=save_path,
                   load_path=None,
-                  logs_dir="/WHERE/I/SAVED/THE/LOGS",
-                  nb_env=1,
+                  logs_dir=logs_dir,
                   training_param=tp,
                   kwargs_converters=kwargs_converters,
                   kwargs_archi=kwargs_archi)
@@ -191,7 +190,6 @@ def train(env,
                             nn_archi=nn_archi,
                             name=name,
                             istraining=True,
-                            nb_env=nb_env,
                             filter_action_fun=filter_action_fun,
                             verbose=verbose,
                             **kwargs_converters
diff --git a/l2rpn_baselines/DuelQSimple/train.py b/l2rpn_baselines/DuelQSimple/train.py
index d7a8f29..239f851 100755
--- a/l2rpn_baselines/DuelQSimple/train.py
+++ b/l2rpn_baselines/DuelQSimple/train.py
@@ -26,7 +26,6 @@ def train(env,
           save_path=None,
           load_path=None,
           logs_dir=None,
-          nb_env=1,
           training_param=None,
           filter_action_fun=None,
           verbose=True,
@@ -57,10 +56,6 @@ def train(env,
     logs_dir: ``str``
         Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them.
 
-    nb_env: ``int``
-        Number of environments used in parrallel. Note that if nb_env > 1, some functions might not be usable. Also,
-        if nb_env > 1 make sure that the `env` argument is a grid2op MultiEnvMultiProcess.
-
     verbose: ``bool``
         If you want something to be printed on the terminal (a better logging strategy will be put at some point)
 
@@ -85,9 +80,13 @@ def train(env,
     baseline: :class:`DeepQSimple`
         The trained baseline.
 
+
+    .. _Example-duelqsimple:
+
     Examples
     ---------
-    Here is an example on how to train a DeepSimple baseline.
+
+    Here is an example on how to train a DuelQSimple baseline.
 
     First define a python script, for example
 
@@ -135,7 +134,6 @@ def train(env,
                   save_path="/WHERE/I/SAVED/THE/MODEL",
                   load_path=None,
                   logs_dir="/WHERE/I/SAVED/THE/LOGS",
-                  nb_env=1,
                   training_param=tp,
                   kwargs_converters=kwargs_converters,
                   kwargs_archi=kwargs_archi)
@@ -178,7 +176,6 @@ def train(env,
                             nn_archi=nn_archi,
                             name=name,
                             istraining=True,
-                            nb_env=nb_env,
                             verbose=verbose,
                             **kwargs_converters
                             )
diff --git a/l2rpn_baselines/SAC/train.py b/l2rpn_baselines/SAC/train.py
index df1d773..2262f5c 100755
--- a/l2rpn_baselines/SAC/train.py
+++ b/l2rpn_baselines/SAC/train.py
@@ -26,7 +26,6 @@ def train(env,
           save_path=None,
           load_path=None,
           logs_dir=None,
-          nb_env=1,
           training_param=None,
           filter_action_fun=None,
           verbose=True,
@@ -56,10 +55,6 @@ def train(env,
     logs_dir: ``str``
         Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them.
 
-    nb_env: ``int``
-        Number of environments used in parrallel. Note that if nb_env > 1, some functions might not be usable. Also,
-        if nb_env > 1 make sure that the `env` argument is a grid2op MultiEnvMultiProcess.
-
     verbose: ``bool``
         If you want something to be printed on the terminal (a better logging strategy will be put at some point)
 
@@ -84,9 +79,12 @@ def train(env,
     baseline: :class:`DeepQSimple`
         The trained baseline.
 
+
+    .. _Example-sac:
+
     Examples
     ---------
-    Here is an example on how to train a DeepSimple baseline.
+    Here is an example on how to train a SAC baseline.
 
     First define a python script, for example
 
@@ -141,7 +139,6 @@ def train(env,
                   save_path="/WHERE/I/SAVED/THE/MODEL",
                   load_path=None,
                   logs_dir="/WHERE/I/SAVED/THE/LOGS",
-                  nb_env=1,
                   training_param=tp,
                   kwargs_converters=kwargs_converters,
                   kwargs_archi=kwargs_archi)
@@ -184,7 +181,6 @@ def train(env,
                    nn_archi=nn_archi,
                    name=name,
                    istraining=True,
-                   nb_env=nb_env,
                    verbose=verbose,
                    **kwargs_converters
                    )
diff --git a/l2rpn_baselines/test/test_train_eval.py b/l2rpn_baselines/test/test_train_eval.py
index 98e5ce0..f867286 100644
--- a/l2rpn_baselines/test/test_train_eval.py
+++ b/l2rpn_baselines/test/test_train_eval.py
@@ -11,10 +11,12 @@
 import unittest
 import warnings
 import tempfile
+import logging
 
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+logging.getLogger('tensorflow').setLevel(logging.FATAL)
+
 import grid2op
-from grid2op.Environment import MultiEnvironment
 
 from l2rpn_baselines.utils import TrainingParam, NNParam, make_multi_env
 from l2rpn_baselines.DeepQSimple import train as train_dqn
@@ -69,7 +71,6 @@ def test_train_eval(self):
                       save_path=tmp_dir,
                       load_path=None,
                       logs_dir=tmp_dir,
-                      nb_env=1,
                       training_param=tp,
                       verbose=False,
                       kwargs_converters=kwargs_converters,
@@ -167,7 +168,6 @@ def test_train_eval(self):
                       save_path=tmp_dir,
                       load_path=None,
                       logs_dir=tmp_dir,
-                      nb_env=1,
                       training_param=tp,
                       verbose=False,
                       kwargs_converters=kwargs_converters,
@@ -224,7 +224,6 @@ def test_train_eval(self):
                       save_path=tmp_dir,
                       load_path=None,
                       logs_dir=tmp_dir,
-                      nb_env=1,
                       training_param=tp,
                       verbose=False,
                       kwargs_converters=kwargs_converters,
@@ -282,7 +281,6 @@ def test_train_eval(self):
                        save_path=tmp_dir,
                        load_path=None,
                        logs_dir=tmp_dir,
-                       nb_env=1,
                        training_param=tp,
                        verbose=False,
                        kwargs_converters=kwargs_converters,
diff --git a/l2rpn_baselines/utils/BaseDeepQ.py b/l2rpn_baselines/utils/BaseDeepQ.py
index d21c811..2e1d8d8 100644
--- a/l2rpn_baselines/utils/BaseDeepQ.py
+++ b/l2rpn_baselines/utils/BaseDeepQ.py
@@ -71,10 +71,12 @@ class BaseDeepQ(ABC):
 
     def __init__(self,
                  nn_params,
-                 training_param=None):
+                 training_param=None,
+                 verbose=False):
         self._action_size = nn_params.action_size
         self._observation_size = nn_params.observation_size
         self._nn_archi = nn_params
+        self.verbose = verbose
 
         if training_param is None:
             self._training_param = TrainingParam()
@@ -207,7 +209,8 @@ def load_network(self, path, name=None, ext="h5"):
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore")
             self._target_model = load_model('{}.{}'.format(path_target_model, ext), custom_objects=self._custom_objects)
-        print("Succesfully loaded network.")
+        if self.verbose:
+            print("Succesfully loaded network.")
 
     def target_train(self):
         """
diff --git a/l2rpn_baselines/utils/DeepQAgent.py b/l2rpn_baselines/utils/DeepQAgent.py
index 535fb81..f8334e0 100644
--- a/l2rpn_baselines/utils/DeepQAgent.py
+++ b/l2rpn_baselines/utils/DeepQAgent.py
@@ -353,7 +353,7 @@ def train(self,
 
         Parameters
         ----------
-        env: :class:`grid2op.Environment.Environment`
+        env: :class:`grid2op.Environment.Environment` or :class:`grid2op.Environment.MultiEnvironment`
             The environment used to train your model.
 
         iterations: ``int``
diff --git a/l2rpn_baselines/utils/TrainingParam.py b/l2rpn_baselines/utils/TrainingParam.py
index c186220..df46c5e 100644
--- a/l2rpn_baselines/utils/TrainingParam.py
+++ b/l2rpn_baselines/utils/TrainingParam.py
@@ -189,34 +189,28 @@ def __init__(self,
 
     @property
     def final_epsilon(self):
-        """return the final epsilon allowed by this instance"""
         return self._final_epsilon
 
     @final_epsilon.setter
     def final_epsilon(self, final_epsilon):
-        """used to update the final_epsilon"""
         self._final_epsilon = final_epsilon
         self._compute_exp_facto()
 
     @property
     def initial_epsilon(self):
-        """get the intial epsilon used for epsilon greedy"""
         return self._initial_epsilon
 
     @initial_epsilon.setter
     def initial_epsilon(self, initial_epsilon):
-        """used to update the initial_epsilon attribute"""
         self._initial_epsilon = initial_epsilon
         self._compute_exp_facto()
 
     @property
     def update_nb_iter(self):
-        """update the total number of iteration you want to make"""
         return self._update_nb_iter
 
     @update_nb_iter.setter
     def update_nb_iter(self, update_nb_iter):
-        """update the total number of iteration you want to make"""
         self._update_nb_iter = update_nb_iter
         if self._update_nb_iter is not None and self._update_nb_iter > 0:
             self._1_update_nb_iter = 1.0 / self._update_nb_iter
diff --git a/l2rpn_baselines/utils/make_multi_env.py b/l2rpn_baselines/utils/make_multi_env.py
index f157277..6222f28 100644
--- a/l2rpn_baselines/utils/make_multi_env.py
+++ b/l2rpn_baselines/utils/make_multi_env.py
@@ -7,7 +7,15 @@
 # This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.
 
 import warnings
-from grid2op.Environment import MultiEnvironment, Environment
+from grid2op.Environment import Environment
+try:
+    from grid2op.Environment import MultiEnvironment
+except ImportError:
+    # name will be change as of grid2op >= 1.0.0
+    try:
+        from grid2op.Environment import MultiEnvMultiProcess as MultiEnvironment
+    except ImportError as exc:
+        raise exc
 
 
 def make_multi_env(env_init, nb_env):