diff --git a/doc_build.sh b/doc_build.sh
index b3ca561e..7b384ac0 100755
--- a/doc_build.sh
+++ b/doc_build.sh
@@ -5,7 +5,7 @@ rm -rf build
 
 # create html pages
 sphinx-build -b html source build
-make html
+#make html
 
 # open web browser(s) to master table of content
 if which firefox
diff --git a/docs/source/utils.rst b/docs/source/api_reference/1_utils.rst
similarity index 100%
rename from docs/source/utils.rst
rename to docs/source/api_reference/1_utils.rst
diff --git a/docs/source/problems.rst b/docs/source/api_reference/2_problems.rst
similarity index 100%
rename from docs/source/problems.rst
rename to docs/source/api_reference/2_problems.rst
diff --git a/docs/source/models.rst b/docs/source/api_reference/3_models.rst
similarity index 100%
rename from docs/source/models.rst
rename to docs/source/api_reference/3_models.rst
diff --git a/docs/source/workers.rst b/docs/source/api_reference/4_workers.rst
similarity index 100%
rename from docs/source/workers.rst
rename to docs/source/api_reference/4_workers.rst
diff --git a/docs/source/grid_workers.rst b/docs/source/api_reference/5_grid_workers.rst
similarity index 100%
rename from docs/source/grid_workers.rst
rename to docs/source/api_reference/5_grid_workers.rst
diff --git a/docs/source/helpers.rst b/docs/source/api_reference/6_helpers.rst
similarity index 100%
rename from docs/source/helpers.rst
rename to docs/source/api_reference/6_helpers.rst
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 073d1d87..81871ae2 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -150,7 +150,7 @@ def __getattr__(cls, name):
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+#html_static_path = ['_static']
 
 # Custom sidebar templates, must be a dictionary that maps document names to template names.
 # The default sidebars (for documents that don't match any pattern) are
@@ -216,8 +216,8 @@ def __getattr__(cls, name):
     'torchvision': ('https://pytorch.org/docs/stable/', None),
     'python': ('https://docs.python.org/3', None),
     'yaml': ('https://yaml.readthedocs.io/en/latest/', None),
-    'numpy': ('https://numpy.readthedocs.io/en/latest/', None)
-}
+    'numpy': ('https://numpy.readthedocs.io/en/latest/', None),
+    'matplotlib': ('https://matplotlib.org/', None)}
 
 # -- Options for Texinfo output ----------------------------------------------
 
diff --git a/docs/source/index.rst b/docs/source/index.rst
index a9d97bc4..100f27d4 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -17,6 +17,13 @@ MI Prometheus is an open source Python library, built using PyTorch, that enable
 
    notes/*
 
+.. toctree::
+   :glob:
+   :maxdepth: 1
+   :caption: MI-Prometheus Primer
+
+   mip_primer/*
+
 .. toctree::
    :glob:
    :maxdepth: 1
@@ -26,15 +33,11 @@ MI Prometheus is an open source Python library, built using PyTorch, that enable
 
 
 .. toctree::
+   :glob:
    :maxdepth: 1
    :caption: Package Reference
 
-   workers
-   grid_workers
-   helpers
-   models
-   problems
-   utils
+   api_reference/*
 
 
 Indices and tables
diff --git a/docs/source/notes/2_mip_explained.rst b/docs/source/mip_primer/1_mip_explained.rst
similarity index 88%
rename from docs/source/notes/2_mip_explained.rst
rename to docs/source/mip_primer/1_mip_explained.rst
index d247cc7e..7885ed61 100644
--- a/docs/source/notes/2_mip_explained.rst
+++ b/docs/source/mip_primer/1_mip_explained.rst
@@ -1,5 +1,5 @@
 MI-Prometheus Explained
-================================
+=======================
 `@author: Tomasz Kornuta & Vincent Marois`
 
 This page dives deep into MI-Prometheus and its inner workings.
@@ -16,8 +16,16 @@ When training a model, people write programs which typically follow a similar pa
     - Updating the model parameters using an optimizer.
     
 
-During each iteration, the program also needs to collect some statistics (such as the
-training / validation loss & accuracy) and save the weights of the resulting model into a file.
+During each iteration, the program also can collect some statistics (such as the
+training / validation loss & accuracy) and (optionally) save the weights of the resulting model into a file.
+
+
+.. figure:: ../img/core_concepts.png
+   :scale: 50 %
+   :alt: The 5 core concepts of Mi-Prometheus
+   :align: center
+
+   The 5 core concepts of Mi-Prometheus. Dotted elements indicate optional inputs/outputs/dataflows.
 
 
 This typical workflow led us to the formalization of the core concepts of the framework:
@@ -29,30 +37,35 @@ This typical workflow led us to the formalization of the core concepts of the fr
     - **Experiment**: a single run (training & validation or test) of a given Model on a given Problem, using a specific Worker and Configuration file(s).
 
 
-.. figure:: ../img/core_concepts.png
-   :scale: 50 %
-   :alt: The 5 core concepts of Mi-Prometheus
-   :align: center
+Aside of Workers, MI-Prometheus currently offers 2 types of specialized applications, namely:
 
-   The 5 core concepts of Mi-Prometheus. Dotted elements indicate optional inputs/outputs/dataflows.
+    - **Grid Worker**: a specialized application automating spanning of a number (grid) of experiments.
+    - **Helper**: an application useful from the point of view of experiment running, but independent/external to the Workers.
+    
+General idea here is that Grid Workers are useful in reproducible research, when one has e.g. to train a set of independent models on set of problems and 
+compare the results. 
+In such a case user can use Helpers e.g. to download required datasets (in advance, before training) and/or preprocess them in a specific way
+(e.g. extract features from all images in a dataset once once, with a pretrained CNN model0, which will reduce overall time of all experiments.
 
 Architecture
 ---------------
 
 From an architectural point of view, MI-Prometheus can be seen as four stacked layers of interconnected modules.
 
-	- The lowest layer is formed by the external libraries that MI-Prometheus relies on, primarily PyTorch, NumPy and CUDA. Additionally, our basic workers rely on TensorBoardX, enabling the export of collected statistics, models and their parameters (weights, gradients) to TensorBoard. Optionally, some models and problems might depend on other external libraries. For instance, the framework currently incorporates problems and models from PyTorch’s wrapper to the TorchVision package.
-	- The second layer includes all the utilities that we have developed internally, such as the Parameter Registry (a singleton offering access to the registry of parameters), the Application State (another singleton representing the current state of application, e.g. whether the computations should be done on GPUs or not), factories used by the workers for instantiating the problem and model classes (indicated by the configuration file and loaded from the corresponding file). Additionally, this layer contains several tools, which are useful during an experiment run, such as logging facilities or statistics collectors (accessible by both the Problem and the Model).
-	- Next, the Components layer contains the models, problems and workers, i.e. the three major components required for the execution of one experiment. The problem and model classes are organized following specific hierarchies, using inheritance to facilitate their further extensions.
-	- Finally, the Experiment layer includes the configuration files, along with all the required inputs (such as the files containing the dataset, the files containing the saved model checkpoints with the weights to be loaded etc.) and outputs (logs from the experiment run, CSV files gathering the collected statistics, files containing the checkpoints of the best obtained model).
-
-
 .. figure:: ../img/layers.png
    :scale: 50 %
    :alt: Mi-Prometheus is constituted of 4 main inter-connected layers.
    :align: center
 
-   From an architectural point of view, MI-Prometheus can be seen as four stacked layers of interconnected modules.
+   Architecture of the MI-Prometheus framework.
+
+
+The layers are as follows:
+
+	- The lowest layer is formed by the external libraries that MI-Prometheus relies on, primarily PyTorch, NumPy and CUDA. Additionally, our basic workers rely on TensorBoardX, enabling the export of collected statistics, models and their parameters (weights, gradients) to TensorBoard. Optionally, some models and problems might depend on other external libraries. For instance, the framework currently incorporates problems and models from PyTorch’s wrapper to the TorchVision package.
+	- The second layer includes all the utilities that we have developed internally, such as the Parameter Registry (a singleton offering access to the registry of parameters), the Application State (another singleton representing the current state of application, e.g. whether the computations should be done on GPUs or not), factories used by the workers for instantiating the problem and model classes (indicated by the configuration file and loaded from the corresponding file). Additionally, this layer contains several tools, which are useful during an experiment run, such as logging facilities or statistics collectors (accessible by both the Problem and the Model).
+	- Next, the Components layer contains the models, problems and workers, i.e. the three major components required for the execution of one experiment. The problem and model classes are organized following specific hierarchies, using inheritance to facilitate their further extensions.
+	- Finally, the Experiment layer includes the configuration files, along with all the required inputs (such as the files containing the dataset, the files containing the saved model checkpoints with the weights to be loaded etc.) and outputs (logs from the experiment run, CSV files gathering the collected statistics, files containing the checkpoints of the best obtained model).
 
 
 .. See http://docutils.sourceforge.net/docs/ref/rst/directives.html for a breakdown of the options
diff --git a/docs/source/notes/5_problems_explained.rst b/docs/source/mip_primer/2_problems_explained.rst
similarity index 100%
rename from docs/source/notes/5_problems_explained.rst
rename to docs/source/mip_primer/2_problems_explained.rst
diff --git a/docs/source/notes/4_models_explained.rst b/docs/source/mip_primer/3_models_explained.rst
similarity index 100%
rename from docs/source/notes/4_models_explained.rst
rename to docs/source/mip_primer/3_models_explained.rst
diff --git a/docs/source/mip_primer/4_workers_explained.rst b/docs/source/mip_primer/4_workers_explained.rst
new file mode 100644
index 00000000..cce07ca9
--- /dev/null
+++ b/docs/source/mip_primer/4_workers_explained.rst
@@ -0,0 +1,88 @@
+Workers Explained
+===================
+`@author: Tomasz Kornuta & Vincent Marois`
+
+The Workers are scripts which execute a certain task given a Model and a Problem.
+They are related to either the training (Trainers) or the testing procedure (Tester) and can support both CPUs & GPUs.
+
+.. figure:: ../img/worker_basic_class_diagram.png
+   :scale: 50 %
+   :alt: Class diagram of the workers.
+   :align: center
+
+   The class inheritance of the workers. The Trainers & the Tester classes inherit from a Worker class, to follow OOP best practices.
+
+Trainers
+^^^^^^^^^^
+
+There are two types of Trainers: **Online Trainer** and **Offline Trainer**.
+
+The **Offline Trainer** is based on epochs and validates the model on the validation set at the end of each epoch. Thus, it is well-suited for finite-size datases, such as MNIST.
+
+While an epoch seems natural for all finite-size datasets, it makes less sense for problems which have a very large, almost infinite dataset (like algorithmic tasks, which generate data `on-the-fly`).
+This is why we also developed the **Online Trainer**, which, instead of looping on epochs, iterates directly on episodes (we call an iteration on a single batch an episode).
+
+By default, the **Online Trainer** validates the model every `n` episodes on a subset of the validation set, whereas **Offline Trainer** validates the model on the whole validation set at the end of every epoch.
+The Offline Trainer can also validates the model every `n` episodes on a subset of the validation set (we refer to this as partial validation), and both trainers validate the model on the whole validation set at the end of training.
+
+Tester
+^^^^^^^^^^
+
+The third Worker is **Tester**, which loads a trained model and iterates over the test set once, collecting all the specified statistics (mean loss, accuracy etc.).
+
+Both the Trainers and the **Tester** share a similar logic of operation. They both also support CPU and GPU working modes.
+The user can activate this by passing the `––gpu` argument when running a given worker from the command line, which will result in moving the tensors to GPU (e.g. `torch.FloatTensor` to `torch.cuda.FloatTensor`), thus allowing the Model to use CUDA and perform its computations on GPU.
+
+
+We can distinguish two main phases of functioning for the workers: the initialization and the iteration over the batches of samples (each such iteration on a single batch is called an Episode) produced by the model.
+
+Initialization:
+^^^^^^^^^^^^^^^
+
+.. figure:: ../img/initialization_sequence_diagram.png
+   :scale: 50 %
+   :alt: The most important interactions between Worker, Model & Problem during the initialization phase.
+   :align: center
+
+   The most important interactions between Worker, Model & Problem during the initialization phase.
+
+
+After loading the configuration file(s) in the Parameter Registry, the worker initializes the logger, creates an output experiment folder and output CSV files, exports the current experiment settings (content of the Parameter Registry) to a file and (optionally) initializes a TensorBoard logger.
+
+Next, it instantiates the problem and model classes using specialized factories. At that point, the Tester also loads the model weights from the checkpoints file indicated by one of the command line arguments (which is optional for the Trainers).
+
+In order to ensure that the Problem and the Model are compatible, both basic workers perform an automated handshaking, to check whether the definitions (i.e. name, type and shape when relevant) of the inputs produced by the Problem match the required definitions of the Model inputs.
+They also verify if the definitions of the model’s predictions match the definitions of the Problem targets and are compatible with the used loss function.
+
+
+Iterations over the batches of samples:
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. figure:: ../img/episode_sequence_diagram.png
+   :scale: 50 %
+   :alt: The interactions between the Worker, Problem and Model during a single episode, which are shared between the Trainer and the Tester.
+   :align: center
+
+   The interactions between the Worker, Problem and Model during a single episode, which are shared between the Trainer and the Tester.
+
+
+In every episode, the Worker retrieves a batch of samples from the Problem, inputs it to the Model, collects the Model’s predictions and passes them back to the Problem in order to compute the loss (and other statistics, such as accuracy).
+
+At the end of the episode, all events and collected statistics are logged to the experiment folder & files.
+The Trainers performs several additional computations afterwards. First of all, they perform the model optimization, i.e. updating the model weights using error backpropagation and an optimizer (indicated in the configuration file). They also validate the Model, as explained above.
+
+If visualization is active, the Trainers also display the current behavior of the Model, through a visualization window specific to the Model.
+Finally, they also export the Model along with the collected statistics to a checkpoint file.
+
+Terminal conditions:
+^^^^^^^^^^^^^^^^^^^^
+
+Training ends when one of the following conditions is met:
+
+    - The epoch limit is reached (used by default by the **Offline Trainer**),
+    - The episode limit is reached (used by default by the **Online Trainer**),
+    - The validation loss goes below a certain threshold. Depending on the Trainer, we consider:
+        + average loss over the entire validation set calculated at the end of every epoch for the **Offline Trainer**,
+        + partial validation loss (loss on a single batch) calculated every *partial iteration interval* for the **Online Trainer**.
+
+It is worth mentioning that both trainers can use both limits -- user simply has to sed the adequate parameters in a configuration file.
diff --git a/docs/source/mip_primer/5_grid_workers.rst b/docs/source/mip_primer/5_grid_workers.rst
new file mode 100644
index 00000000..6e979cea
--- /dev/null
+++ b/docs/source/mip_primer/5_grid_workers.rst
@@ -0,0 +1,30 @@
+
+Grid Workers Explained
+======================
+`@author: Tomasz Kornuta & Vincent Marois`
+
+There are five Grid Workers, i.e. scripts which manage sets of experiments on grids of CPUs/GPUs.
+These are:
+
+    - two Grid Trainers (separate versions for collections of CPUs and GPUs) spanning several trainings in parallel,
+    - two Grid Testers (similarly),
+    - a single Grid Analyzer, which colleects the results of several trainings & tests in a given experiment directory into a single csv file.
+
+
+.. figure:: ../img/worker_grid_class_diagram.png
+   :scale: 50 %
+   :alt: Class diagram of the grid workers.
+   :align: center
+
+   The class inheritance of the grid workers. The Trainers & the Tester classes inherit from a base Worker class, to follow OOP best practices.
+
+
+The Grid Trainers and Testers in fact spawn several instances of base Trainers and Testers respectively.
+The CPU & GPU versions execute different operations, i.e. the CPUs grid workers assign one processor for each child, whereas the GPUs ones assigns a single GPU instead.
+
+Fig. 7 presents the most important sections of the grid trainer configuration files. Section grid tasks defines the grid of experiments that need to be executed, reusing the mechanism of default configuration nesting.
+Additionally, in grid settings, the user needs to define the number of repetitions of each experiment, as well as the maximum number of authorized concurrent runs (which later on will be compared to the number of available CPUs/GPUs).
+Optionally, the user might overwrite some parameters of a given experiment (in the `overwrite` section) or all experiments at once (`grid_overwrite`).
+
+As a result of running these Grid Trainers and Testers, the user ends up with an experiment directory containing several models and statistics collected during several training, validation and test repetitions.
+The role of the last script, Grid Analyzer, is to iterate through those directories, collecting all statistics and merging them into a single file that facilitates a further analysis of results, the comparison of the models performance, etc.
diff --git a/docs/source/mip_primer/6_helpers.rst b/docs/source/mip_primer/6_helpers.rst
new file mode 100644
index 00000000..bd14a211
--- /dev/null
+++ b/docs/source/mip_primer/6_helpers.rst
@@ -0,0 +1,11 @@
+Helpers Explained
+===================
+`@author: Tomasz Kornuta & Vincent Marois`
+
+Helper is an application useful from the point of view of experiment running, but independent/external to the Workers.
+Currently MI-Prometheus offers two type of helpers:
+
+    - **Problem Initializer**, responsible for initialization of a problem (i.e. download of required data from internet or generation of all samples) in advance, before the real experiment starts.
+    - **Index Splitter**, responsible for generation of files with indices splitting given dataset (in fact set of indices) into two. The resulting files can later be used in training/verification testing when using ``SubsetRandomSampler``.
+
+We expect this list to grow soon.
diff --git a/docs/source/notes/1_installation.rst b/docs/source/notes/1_installation.rst
index 14f77068..58069bc4 100644
--- a/docs/source/notes/1_installation.rst
+++ b/docs/source/notes/1_installation.rst
@@ -22,7 +22,7 @@ If you plan to develop and introduce changes, please call the following command
 
   python setup.py develop
 
-This will enable you to change the code of the existing problems/models/workers and still be able to run them by calling the associated 'mip-*' commands.
+This will enable you to change the code of the existing problems/models/workers and still be able to run them by calling the associated ``mip-*`` commands.
 More in that subject can be found in the following blog post on dev_mode_.
 
 .. _guide: https://github.com/pytorch/pytorch#installation
diff --git a/docs/source/notes/6_updating_doc.rst b/docs/source/notes/2_updating_doc.rst
similarity index 100%
rename from docs/source/notes/6_updating_doc.rst
rename to docs/source/notes/2_updating_doc.rst
diff --git a/docs/source/notes/3_workers_explained.rst b/docs/source/notes/3_workers_explained.rst
deleted file mode 100644
index eba19404..00000000
--- a/docs/source/notes/3_workers_explained.rst
+++ /dev/null
@@ -1,111 +0,0 @@
-Workers Explained
-===================
-`@author: Tomasz Kornuta & Vincent Marois`
-
-
-Our framework currently offers eight different types of Workers, divided into two classes: Base Workers and Grid Workers.
-
-Base Workers
---------------
-
-The Base Workers are scripts which execute a certain task given a Model and a Problem.
-They are related to either the training (Trainers) or the testing procedure (Tester) and can support both CPUs & GPUs.
-
-.. figure:: ../img/worker_basic_class_diagram.png
-   :scale: 50 %
-   :alt: Class diagram of the base workers.
-   :align: center
-
-   The class inheritance of the base workers. The Trainers & the Tester classes inherit from a base Worker class, to follow OOP best practices.
-
-Trainers
-^^^^^^^^^^
-
-There are two types of Trainers: Online Trainer and Offline Trainer.
-
-The Offline Trainer is based on epochs and validates the model on the validation set at the end of each epoch. Thus, it is well-suited for finite-size datases, such as MNIST.
-
-While an epoch seems natural for all finite-size datasets, it makes less sense for problems which have a very large, almost infinite dataset (like algorithmic tasks, which generate data `on-the-fly`).
-This is why we also developed the Online Trainer, which, instead of looping on epochs, iterates directly on episodes (we call an iteration on a single batch an episode).
-
-By default, the Online Trainer validates the model every `n` episodes on a subset of the validation set, whereas Offline Trainer validates the model on the whole validation set at the end of every epoch.
-The Offline Trainer can also validates the model every `n` episodes on a subset of the validation set (we refer to this as partial validation), and both trainers validate the model on the whole validation set at the end of training.
-
-Tester
-^^^^^^^^^^
-
-The third Base Worker is Tester, which loads a trained model and iterates over the test set once, collecting all the specified statistics (mean loss, accuracy etc.).
-
-Both the Trainers and the Tester share a similar logic of operation. They both also support CPU and GPU working modes.
-The user can activate this by passing the `––gpu` argument when running a given worker from the command line, which will result in moving the tensors to GPU (e.g. `torch.FloatTensor` to `torch.cuda.FloatTensor`), thus allowing the Model to use CUDA and perform its computations on GPU.
-
-
-We can distinguish two main phases of functioning for the base workers: the initialization and the iteration over the batches of samples (each such iteration on a single batch is called an Episode) produced by the model.
-
-Initialization:
-
-After loading the configuration file(s) in the Parameter Registry, the worker initializes the logger, creates an output experiment folder and output CSV files, exports the current experiment settings (content of the Parameter Registry) to a file and (optionally) initializes a TensorBoard logger.
-
-Next, it instantiates the problem and model classes using specialized factories. At that point, the Tester also loads the model weights from the checkpoints file indicated by one of the command line arguments (which is optional for the Trainers).
-
-In order to ensure that the Problem and the Model are compatible, both basic workers perform an automated handshaking, to check whether the definitions (i.e. name, type and shape when relevant) of the inputs produced by the Problem match the required definitions of the Model inputs.
-They also verify if the definitions of the model’s predictions match the definitions of the Problem targets and are compatible with the used loss function.
-
-.. figure:: ../img/initialization_sequence_diagram.png
-   :scale: 50 %
-   :alt: The most important interactions between Worker, Model & Problem during the initialization phase.
-   :align: center
-
-   The most important interactions between Worker, Model & Problem during the initialization phase.
-
-
-Iterations over the batches of samples:
-
-In every episode, the Worker retrieves a batch of samples from the Problem, inputs it to the Model, collects the Model’s predictions and passes them back to the Problem in order to compute the loss (and other statistics, such as accuracy).
-
-At the end of the episode, all events and collected statistics are logged to the experiment folder & files.
-The Trainers performs several additional computations afterwards. First of all, they perform the model optimization, i.e. updating the model weights using error backpropagation and an optimizer (indicated in the configuration file). They also validate the Model, as explained above.
-
-If visualization is active, the Trainers also display the current behavior of the Model, through a visualization window specific to the Model.
-Finally, they also export the Model along with the collected statistics to a checkpoint file.
-
-.. figure:: ../img/episode_sequence_diagram.png
-   :scale: 50 %
-   :alt: The interactions between the Worker, Problem and Model during a single episode, which are shared between the Trainer and the Tester.
-   :align: center
-
-   The interactions between the Worker, Problem and Model during a single episode, which are shared between the Trainer and the Tester.
-
-Training ends when one of the following conditions is met:
-
-    - The epoch limit is reached (for the Offline Trainer),
-    - The episode limit is reached (for the Online Trainer),
-    - The validation loss goes below a certain threshold. We consider the average loss over the entire validation set for the Offline Trainer, and the partial validation loss for the Online one.
-
-
-Grid Workers
---------------
-
-Additionally, there are five Grid Workers, i.e. scripts which manage sets of experiments on grids of CPUs/GPUs.
-These are:
-
-    - two Grid Trainers (separate versions for collections of CPUs and GPUs) spanning several trainings in parallel,
-    - two Grid Testers (similarly),
-    - a single Grid Analyzer, which summarizes the results of several trainings & tests into one csv file.
-
-The Grid Trainers and Testers in fact spawn several instances of base Trainers and Testers respectively.
-The CPU & GPU versions execute different operations, i.e. the CPUs grid workers assign one processor for each child, whereas the GPUs ones assigns a single GPU instead.
-
-Fig. 7 presents the most important sections of the grid trainer configuration files. Section grid tasks defines the grid of experiments that need to be executed, reusing the mechanism of default configuration nesting.
-Additionally, in grid settings, the user needs to define the number of repetitions of each experiment, as well as the maximum number of authorized concurrent runs (which later on will be compared to the number of available CPUs/GPUs).
-Optionally, the user might overwrite some parameters of a given experiment (in the `overwrite` section) or all experiments at once (`grid_overwrite`).
-
-As a result of running these Grid Trainers and Testers, the user ends up with an experiment directory containing several models and statistics collected during several training, validation and test repetitions.
-The role of the last script, Grid Analyzer, is to iterate through those directories, collecting all statistics and merging them into a single file that facilitates a further analysis of results, the comparison of the models performance, etc.
-
-.. figure:: ../img/worker_grid_class_diagram.png
-   :scale: 50 %
-   :alt: Class diagram of the grid workers.
-   :align: center
-
-   The class inheritance of the grid workers. The Trainers & the Tester classes inherit from a base Worker class, to follow OOP best practices.
\ No newline at end of file
diff --git a/miprometheus/models/model.py b/miprometheus/models/model.py
index f77916e5..4612c1cb 100644
--- a/miprometheus/models/model.py
+++ b/miprometheus/models/model.py
@@ -422,13 +422,10 @@ def load(self, checkpoint_file):
     def summarize(self):
         """
         Summarizes the model by showing the trainable/non-trainable parameters and weights\
-         per layer (``nn.Module``).
+         per layer ( ``nn.Module`` ).
 
         Uses ``recursive_summarize`` to iterate through the nested structure of the model (e.g. for RNNs).
 
-        :param: Model object for which the summary will be created.
-        :type ``models.model.Model`` (which inherits from ``nn.Module``).
-
         :return: Summary as a str.
 
         """
@@ -507,7 +504,7 @@ def recursive_summarize(self, module_, indent_, module_name_):
 
 
 if __name__ == '__main__':
-    """Unit test for the handshake."""
+    """Model unit test."""
     from miprometheus.utils.param_interface import ParamInterface
     params = ParamInterface()
 
@@ -524,16 +521,3 @@ def recursive_summarize(self, module_, indent_, module_name_):
                               'index': {'size': [-1], 'type': [list, int]},
                               'imgfile': {'size': [-1,-1], 'type': [list,str]},
                                }
-
-    problem_data_definitions = {'img': {'size': [-1, 320, 480, 3], 'type': [np.ndarray]},
-                                'question': {'size': [-1, -1], 'type': [torch.Tensor]},
-                                'question_length': {'size': [-1], 'type': [list, int]},
-                                'question_string': {'size': [-1,-1], 'type': [list, str]},
-                                'question_type': {'size': [-1,-1], 'type': [list, str]},
-                                'targets': {'size': [-1], 'type': [torch.Tensor]},
-                                'targets_string': {'size': [-1,-1], 'type': [list, str]},
-                                'index': {'size': [-1], 'type': [list, int]},
-                                'imgfile': {'size': [-1,-1], 'type': [list,str]}
-                                }
-
-    model.handshake_definitions(problem_data_definitions)
diff --git a/miprometheus/models/relational_net/functions.py b/miprometheus/models/relational_net/functions.py
index 77c684fd..2834e80b 100644
--- a/miprometheus/models/relational_net/functions.py
+++ b/miprometheus/models/relational_net/functions.py
@@ -59,10 +59,10 @@ def forward(self, inputs):
         """
         forward pass of the g_theta MLP.
 
-        :param inputs: tensor of shape [batch_size, *, input_size], should represent the pairs of regions (in the CNN \
+        :param inputs: tensor of shape [batch_size, -1, input_size], should represent the pairs of regions (in the CNN \
         feature maps) cat with the question encoding.
 
-        :return: tensor of shape [batch_size, *, 256].
+        :return: tensor of shape [batch_size, -1, 256].
 
         """
 
@@ -113,10 +113,10 @@ def forward(self, inputs):
         """
         forward pass of the f_phi MLP.
 
-        :param inputs: tensor of shape [batch_size, *, 256], should represent the element-wise sum of the outputs of \
+        :param inputs: tensor of shape [batch_size, -1, 256], should represent the element-wise sum of the outputs of \
         g_theta.
 
-        :return: Predictions over the available classes, tensor of shape [batch_size, *, output_size]
+        :return: Predictions over the available classes, tensor of shape [batch_size, -1, output_size]
 
         """
 
diff --git a/miprometheus/models/relational_net/relational_network.py b/miprometheus/models/relational_net/relational_network.py
index cb0af5a7..9205a841 100644
--- a/miprometheus/models/relational_net/relational_network.py
+++ b/miprometheus/models/relational_net/relational_network.py
@@ -135,7 +135,7 @@ def forward(self, data_dict):
         """
         Runs the ``RelationalNetwork`` model.
 
-        :param data_dict: DataDict({'images', 'questions', **}) containing:
+        :param data_dict: DataDict({'images', 'questions', ...}) containing:
 
             - images [batch_size, num_channels, height, width],
             - questions [batch_size, question_size]
diff --git a/miprometheus/models/thalnet/thalnet_model.py b/miprometheus/models/thalnet/thalnet_model.py
index 5c7b5a40..a70bd20e 100644
--- a/miprometheus/models/thalnet/thalnet_model.py
+++ b/miprometheus/models/thalnet/thalnet_model.py
@@ -94,7 +94,7 @@ def forward(self, data_dict):  # x : batch_size, seq_len, input_size
         """
         Forward run of the ThalNetModel model.
 
-        :param data_dict: DataDict({'sequences', **}) where 'sequences' is of shape \
+        :param data_dict: DataDict({'sequences', ...}) where 'sequences' is of shape \
          [batch_size, sequence_length, input_size]
         :type data_dict: utils.DataDict
 
@@ -205,7 +205,7 @@ def plot(self, data_dict, logits, sample=0):
         """
         Plots specific information on the model's behavior.
 
-        :param data_dict: DataDict({'sequences', **})
+        :param data_dict: DataDict({'sequences', ...})
         :type data_dict: utils.DataDict
 
         :param logits: Predictions of the model
@@ -214,7 +214,7 @@ def plot(self, data_dict, logits, sample=0):
         :param sample: Index of the sample to visualize. Default to 0.
         :type sample: int
 
-        :return: ``True`` if the user pressed stop, else ``False``.
+        :returns: ``True`` if the user pressed stop, else ``False``.
 
         """
         # Check if we are supposed to visualize at all.
diff --git a/miprometheus/models/vqa_baselines/cnn_lstm/cnn_lstm.py b/miprometheus/models/vqa_baselines/cnn_lstm/cnn_lstm.py
index eec6deed..3dc24cc7 100644
--- a/miprometheus/models/vqa_baselines/cnn_lstm/cnn_lstm.py
+++ b/miprometheus/models/vqa_baselines/cnn_lstm/cnn_lstm.py
@@ -153,7 +153,7 @@ def forward(self, data_dict):
         """
         Runs the ``CNN_LSTM`` model.
 
-        :param data_dict: DataDict({'images', 'questions', **}) where:
+        :param data_dict: DataDict({'images', 'questions', ...}) where:
 
             - images: [batch_size, num_channels, height, width],
             - questions: [batch_size, size_question_encoding]
@@ -198,7 +198,7 @@ def plot(self, data_dict, predictions, sample=0):
 
         :param data_dict: DataDict({'images', 'questions', 'targets'}) where:
 
-            - images: [batch_size, num_channels, height, width],
+            - images: [batch_size, num_channels, height, width]
             - questions: [batch_size, size_question_encoding]
             - targets: [batch_size]
 
@@ -208,7 +208,7 @@ def plot(self, data_dict, predictions, sample=0):
         :type predictions: torch.tensor
 
         :param sample: Index of sample in batch (DEFAULT: 0).
-        :type sample:int
+        :type sample: int
 
         """
         # Check if we are supposed to visualize at all.
diff --git a/miprometheus/models/vqa_baselines/stacked_attention_networks/multi_hops_stacked_attention_model.py b/miprometheus/models/vqa_baselines/stacked_attention_networks/multi_hops_stacked_attention_model.py
index 07198ea4..e06084f2 100644
--- a/miprometheus/models/vqa_baselines/stacked_attention_networks/multi_hops_stacked_attention_model.py
+++ b/miprometheus/models/vqa_baselines/stacked_attention_networks/multi_hops_stacked_attention_model.py
@@ -158,7 +158,7 @@ def forward(self, data_dict):
         """
         Runs the ``MultiHopsStackedAttentionNetwork`` model.
 
-        :param data_dict: DataDict({'images', 'questions', **}) where:
+        :param data_dict: DataDict({'images', 'questions', ...}) where:
 
             - images: [batch_size, num_channels, height, width],
             - questions: [batch_size, size_question_encoding]
@@ -221,7 +221,7 @@ def plot(self, data_dict, predictions, sample=0):
         :type predictions: torch.tensor
 
         :param sample: Index of sample in batch (DEFAULT: 0).
-        :type sample:int
+        :type sample: int
         """
         # Check if we are supposed to visualize at all.
         if not self.app_state.visualize:
diff --git a/miprometheus/models/vqa_baselines/stacked_attention_networks/stacked_attention_model.py b/miprometheus/models/vqa_baselines/stacked_attention_networks/stacked_attention_model.py
index 06e95fee..02c07b5d 100644
--- a/miprometheus/models/vqa_baselines/stacked_attention_networks/stacked_attention_model.py
+++ b/miprometheus/models/vqa_baselines/stacked_attention_networks/stacked_attention_model.py
@@ -155,10 +155,11 @@ def forward(self, data_dict):
         """
         Runs the ``StackedAttentionNetwork`` model.
 
-        :param data_dict: DataDict({'images', 'questions', **}) where:
+        :param data_dict: DataDict({'images', 'questions', ...}) where:
 
             - images: [batch_size, num_channels, height, width],
             - questions: [batch_size, size_question_encoding]
+
         :type data_dict: utils.DataDict
 
         :returns: Predictions: [batch_size, output_classes]
@@ -209,7 +210,7 @@ def plot(self, data_dict, predictions, sample=0):
         :type predictions: torch.tensor
 
         :param sample: Index of sample in batch (DEFAULT: 0).
-        :type sample:int
+        :type sample: int
         """
         # Check if we are supposed to visualize at all.
         if not self.app_state.visualize:
diff --git a/miprometheus/problems/image_to_class/cifar10.py b/miprometheus/problems/image_to_class/cifar10.py
index 8a550a0e..715dbe6d 100644
--- a/miprometheus/problems/image_to_class/cifar10.py
+++ b/miprometheus/problems/image_to_class/cifar10.py
@@ -32,7 +32,7 @@
 
 class CIFAR10(ImageToClassProblem):
     """
-    Classic CIFAR10 classification problem.
+    Image classification problem using the CIFAR-10 dataset.
 
     Please see reference here: https://www.cs.toronto.edu/~kriz/cifar.html
 
@@ -47,7 +47,7 @@ class CIFAR10(ImageToClassProblem):
 
     def __init__(self, params):
         """
-        Initializes CIFAR10 problem:
+        Initializes the CIFAR-10 problem:
 
             - Calls ``problems.problem.ImageToClassProblem`` class constructor,
             - Sets following attributes using the provided ``params``:
@@ -83,7 +83,7 @@ def __init__(self, params):
             >>>           'use_train_data': True}
 
 
-        :param params: Dictionary of parameters (read from configuration ``.yaml``file).
+        :param params: Dictionary of parameters (read from configuration ``.yaml`` file).
         :type params: miprometheus.utils.ParamInterface
 
         """
diff --git a/miprometheus/problems/problem.py b/miprometheus/problems/problem.py
index 977d7b75..4b3ece99 100644
--- a/miprometheus/problems/problem.py
+++ b/miprometheus/problems/problem.py
@@ -145,112 +145,6 @@ def create_data_dict(self):
         return DataDict({key: None for key in self.data_definitions.keys()})
 
 
-    def handshake_definitions(self, model_data_definitions_):
-        """
-        Proceeds to the handshake between what the Model produces as predictions and what the Problem expects to compute\
-        the loss
-
-        .. note::
-
-            Handshaking is defined here as making sure that the ``Model`` and the ``Problem`` agree on the data that they
-            exchange.
-            More specifically, the ``Problem`` has a definition of the targets data that it expects\
-            (through its ``self.data_definitions`` attribute). The ``Model`` has the same object describing what \
-            it generates.
-
-            This functions proceeds to the handshaking as:
-
-                - Verifying that the key ``targets`` is present in ``Model.data_definitions`` (representing the logits)\
-                and in ``Problem.data_definitions`` (representing the ground truth answers).
-                If not, an exception is thrown.
-
-                - If this key exists, than this function checks that the shape and type specified in\
-                   ``Model.data_definitions`` are accepted by the loss function specified by the Problem.
-                   Similarly, it also checks if the shape & type indicated for ``Problem.data_definitions_``\
-                    are accepted by its loss function. If not, an exception is thrown.
-
-
-            **If both steps above passed, than the Problem accepts what the Model generates as predictions.**
-
-
-            To properly define the ``data_definitions`` dicts, here are some examples:
-
-                >>> data_definitions = {'img': {'size': [-1, 320, 480, 3], 'type': [np.ndarray]},
-                >>>                     'question': {'size': [-1, -1], 'type': [torch.Tensor]},
-                >>>                     'question_length': {'size': [-1], 'type': [list, int]},
-                >>>                     # ...
-                >>>                     }
-
-                Please indicate both the size and the type as ``lists``:
-
-                    - Indicate all dimensions in the correct order for each key `size` field. If a dimension is\
-                    unimportant or unknown (e.g. the batch size or variable-length sequences), then please indicate \
-                    ``-1`` at the correct location. Also indicate the corect number of dimensions.
-
-                    - both the ground truth targets and the logits should be ``torch.tensor``.
-
-
-        :param model_data_definitions_: Contains the definition of the logits generated by the ``Model`` class (among\
-        other definitions).
-        :type model_data_definitions_: dict
-
-        :return: True if the loss function can accept the logits and ground truth labels produced by the ``Model``\
-         and ``Problem`` respectively, otherwise throws an exception.
-
-        """
-
-        if 'targets' in self.data_definitions.keys() and 'targets' in model_data_definitions_.keys():
-
-            # check the type first, easier
-            if self.data_definitions['targets']['type'] == [torch.Tensor]\
-                    and model_data_definitions_['targets']['type'] == [torch.Tensor]:
-
-                if type(self.loss_function).__name__ in ['L1Loss', 'MSELoss', 'PoissonNLLLoss', 'KLDivLoss', 'BCELoss',
-                                                         'BCEWithLogitsLoss', 'HingeEmbeddingLoss',
-                                                         'MultiLabelMarginLoss', 'SmoothL1Loss', 'SoftMarginLoss',
-                                                         'MultiLabelSoftMarginLoss', 'MaskedBCEWithLogitsLoss']:
-
-                    # these loss functions require the same shape for both the logits and ground truth labels
-                    if len(self.data_definitions['targets']['size']) != len(model_data_definitions_['targets']['size']):
-                        # the ground truth labels and the logits don't have the same number of dimensions
-                        raise ValueError("Both the logits and ground truth labels don't have the same number of "
-                                         "dimensions. The specified loss function ({}) requires it.".format(self.loss_function))
-                    else:
-                        # both have same number of dim, now check that the indicated dimensions are equal
-                        # checking that even the -1 are in the same place.
-                        for i, dim in enumerate(self.data_definitions['targets']['size']):
-                                if dim != model_data_definitions_['targets']['size'][i]:
-                                    raise ValueError('The specified loss function ({}) require that the logits '
-                                                     'and ground truth labels have the same shape. Got logits shape = {}'
-                                                     ' and ground truth labels shape = {}.'.format(self.loss_function,
-                                                                                                  model_data_definitions_['targets']['size'],
-                                                                                                  self.data_definitions['targets']['size']))
-                # these loss functions require that the ground truth labels have 1 less dimension
-                elif type(self.loss_function).__name__ in ['CrossEntropyLoss', 'NLLLoss', 'MarginRankingLoss',
-                                                           'MultiMarginLoss', 'MaskedCrossEntropyLoss']:
-
-                    if len(self.data_definitions['targets']['size']) != len(model_data_definitions_['targets']['size'])-1:
-                        # the ground truth labels and the logits don't have the same number of dimensions
-                        raise ValueError("The specified loss function ({}) requires that the ground truth labels"
-                                         " have one less dimension than the logits. Got logits shape = {}"
-                                         " and ground truth labels shape = {}.".format(self.loss_function,
-                                                                                       model_data_definitions_['targets']['size'],
-                                                                                       self.data_definitions['targets']['size']))
-                    # TODO: should also check that the order of dimension is coherent
-
-                else:
-                    self.logger.warning('The indicated loss function is {}, which requires more than 2 inputs.'
-                                        ' Not checking it for now.'.format(self.loss_function))
-
-            else:
-                raise ValueError("Either the logits or ground truth labels are not torch.Tensor. ")
-
-        else:
-            raise KeyError("Couldn't find the key 'targets' in self.data_definitions or model_data_definitions_.")
-
-        # Everything matches, return true
-        return True
-
     def __len__(self):
         """
         :return: The size of the dataset.
@@ -544,7 +438,7 @@ def curriculum_learning_update_params(self, episode):
 
 
 if __name__ == '__main__':
-    """Unit test for DataDict & targets handshaking"""
+    """Unit test for Problem and DataDict"""
     from miprometheus.utils.param_interface import ParamInterface
 
     params = ParamInterface()
@@ -562,13 +456,3 @@ def curriculum_learning_update_params(self, episode):
 
     #print(repr(datadict))
 
-    model_data_definitions = {'question': {'size': [-1, -1], 'type': [torch.Tensor]},
-                              'question_length': {'size': [-1], 'type': [list, int]},
-                              'question_string': {'size': [-1, -1], 'type': [list, str]},
-                              'question_type': {'size': [-1, -1], 'type': [list, str]},
-                              'targets': {'size': [-1, -1], 'type': [torch.Tensor]},
-                              'targets_string': {'size': [-1, -1], 'type': [list, str]},
-                              'index': {'size': [-1], 'type': [list, int]},
-                              'imgfile': {'size': [-1, -1], 'type': [list, str]}}
-
-    problem.handshake_definitions(model_data_definitions_=model_data_definitions)