From 7861b93bb1c8bded890d32a7c9669a847a279162 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Mon, 20 Apr 2020 11:15:57 +0200
Subject: [PATCH 001/102] Early implementation of attention weight logging

---
 rasa/core/policies/ted_policy.py     |  1 +
 rasa/utils/tensorflow/transformer.py | 43 +++++++++++++++++++++++++++-
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index a1790d52d2e7..602e71c8ae68 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -547,6 +547,7 @@ def _prepare_layers(self) -> None:
             use_value_relative_position=self.config[VALUE_RELATIVE_ATTENTION],
             max_relative_position=self.config[MAX_RELATIVE_POSITION],
             name=DIALOGUE + "_encoder",
+            summary_writer=self.test_summary_writer,
         )
         self._tf_layers[f"embed.{DIALOGUE}"] = layers.Embed(
             self.config[EMBEDDING_DIMENSION],
diff --git a/rasa/utils/tensorflow/transformer.py b/rasa/utils/tensorflow/transformer.py
index 51bfe3094508..6475b5b247c8 100644
--- a/rasa/utils/tensorflow/transformer.py
+++ b/rasa/utils/tensorflow/transformer.py
@@ -4,6 +4,8 @@
 from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.keras import backend as K
 import numpy as np
+from tensorflow_core.python.ops.summary_ops_v2 import ResourceSummaryWriter
+
 from rasa.utils.tensorflow.layers import DenseWithSparseWeights
 
 
@@ -419,9 +421,12 @@ def __init__(
         use_value_relative_position: bool = False,
         max_relative_position: Optional[int] = None,
         heads_share_relative_embedding: bool = False,
+        summary_writer: Optional[ResourceSummaryWriter] = None,
     ) -> None:
         super().__init__()
 
+        self._summary_writer = summary_writer
+
         self._layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-6)
         self._mha = MultiHeadAttention(
             units,
@@ -469,7 +474,9 @@ def call(
             training = K.learning_phase()
 
         x_norm = self._layer_norm(x)  # (batch_size, length, units)
-        attn_out, _ = self._mha(x_norm, x_norm, pad_mask=pad_mask, training=training)
+        attn_out, attn_weights = self._mha(
+            x_norm, x_norm, pad_mask=pad_mask, training=training
+        )
         attn_out = self._dropout(attn_out, training=training)
         x += attn_out
 
@@ -478,6 +485,38 @@ def call(
             ffn_out = layer(ffn_out, training=training)
         x += ffn_out
 
+        if self._summary_writer:
+            # attention_weights.shape == (batch_size, num_heads, length, length)
+
+            # for i_head in range(self._mha.num_heads):
+            #     # select attention weights of one head and reshape for image output
+            #     img = attn_weights[:, i_head, :, :]
+            #     img = tf.expand_dims(img, axis=-1)
+            #     # rescale values to range from 0.0 to 1.0
+            #     img = (img - tf.reduce_min(img)) / (
+            #         tf.reduce_max(img) - tf.reduce_min(img)
+            #     )
+            #     # write image to tensorboard
+            #     with self._summary_writer.as_default():
+            #         tf.summary.image(
+            #             f"attn_weights_h{i_head}", img, step=0, max_outputs=20
+            #         )
+
+            # reshape so we get all attention heads in one image
+            num_heads = self._mha.num_heads
+            length = tf.shape(x)[1]
+            img = tf.reshape(attn_weights, (-1, num_heads * length, length, 1))
+            img = tf.transpose(img, perm=[0, 2, 1, 3])
+            # rescale values to range from 0.0 to 1.0
+            img = (img - tf.reduce_min(img)) / (
+                tf.reduce_max(img) - tf.reduce_min(img)
+            )
+            # write image to tensorboard
+            with self._summary_writer.as_default():
+                tf.summary.image(
+                    f"attn_weights", img, step=0, max_outputs=20
+                )
+
         return x  # (batch_size, length, units)
 
 
@@ -524,6 +563,7 @@ def __init__(
         max_relative_position: Optional[int] = None,
         heads_share_relative_embedding: bool = False,
         name: Optional[Text] = None,
+        summary_writer: Optional[ResourceSummaryWriter] = None,
     ) -> None:
         super().__init__(name=name)
 
@@ -554,6 +594,7 @@ def __init__(
                 use_value_relative_position,
                 max_relative_position,
                 heads_share_relative_embedding,
+                summary_writer,
             )
             for _ in range(num_layers)
         ]

From 3364f341ebf15787c01af58eeaa38311935a91af Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Fri, 4 Sep 2020 14:05:26 +0200
Subject: [PATCH 002/102] Move tensorboard out of layer

---
 rasa/core/policies/ted_policy.py        | 46 ++++++++++++++++++---
 rasa/nlu/classifiers/diet_classifier.py |  4 +-
 rasa/utils/tensorflow/models.py         |  6 +--
 rasa/utils/tensorflow/transformer.py    | 53 +++++--------------------
 4 files changed, 55 insertions(+), 54 deletions(-)

diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index 8ee33ec987b0..1e599bb4a940 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -553,7 +553,6 @@ def _prepare_layers(self) -> None:
             use_value_relative_position=self.config[VALUE_RELATIVE_ATTENTION],
             max_relative_position=self.config[MAX_RELATIVE_POSITION],
             name=DIALOGUE + "_encoder",
-            summary_writer=self.test_summary_writer,
         )
         self._tf_layers[f"embed.{DIALOGUE}"] = layers.Embed(
             self.config[EMBEDDING_DIMENSION],
@@ -574,7 +573,7 @@ def _create_all_labels_embed(self) -> Tuple[tf.Tensor, tf.Tensor]:
 
         return all_labels, all_labels_embed
 
-    def _emebed_dialogue(self, dialogue_in: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
+    def _emebed_dialogue(self, dialogue_in: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor, Optional[tf.Tensor]]:
         """Create dialogue level embedding and mask."""
 
         # mask different length sequences
@@ -582,7 +581,7 @@ def _emebed_dialogue(self, dialogue_in: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor
         mask = tf.sign(tf.reduce_max(dialogue_in, axis=-1) + 1)
 
         dialogue = self._tf_layers[f"ffnn.{DIALOGUE}"](dialogue_in, self._training)
-        dialogue_transformed = self._tf_layers["transformer"](
+        dialogue_transformed, attention_weights = self._tf_layers["transformer"](
             dialogue, 1 - tf.expand_dims(mask, axis=-1), self._training
         )
         dialogue_transformed = tfa.activations.gelu(dialogue_transformed)
@@ -594,7 +593,7 @@ def _emebed_dialogue(self, dialogue_in: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor
 
         dialogue_embed = self._tf_layers[f"embed.{DIALOGUE}"](dialogue_transformed)
 
-        return dialogue_embed, mask
+        return dialogue_embed, mask, attention_weights  # ToDo: ?, ?, (num_layers, batch_size, num_heads, length, length)
 
     def _embed_label(self, label_in: Union[tf.Tensor, np.ndarray]) -> tf.Tensor:
         label = self._tf_layers[f"ffnn.{LABEL}"](label_in, self._training)
@@ -614,7 +613,7 @@ def batch_loss(
 
         all_labels, all_labels_embed = self._create_all_labels_embed()
 
-        dialogue_embed, mask = self._emebed_dialogue(dialogue_in)
+        dialogue_embed, mask, _ = self._emebed_dialogue(dialogue_in)
         label_embed = self._embed_label(label_in)
 
         loss, acc = self._tf_layers[f"loss.{LABEL}"](
@@ -636,7 +635,7 @@ def batch_predict(
         if self.all_labels_embed is None:
             _, self.all_labels_embed = self._create_all_labels_embed()
 
-        dialogue_embed, mask = self._emebed_dialogue(dialogue_in)
+        dialogue_embed, mask, attention_weights = self._emebed_dialogue(dialogue_in)
 
         sim_all = self._tf_layers[f"loss.{LABEL}"].sim(
             dialogue_embed[:, :, tf.newaxis, :],
@@ -648,7 +647,42 @@ def batch_predict(
             sim_all, self.config[SIMILARITY_TYPE]
         )
 
+        if self.tensorboard_log_dir and len(batch_in) == 1:
+            # Log attention weights if we make a single prediction
+            print("xxxxxxxxxxxxxxxxxxxxxxxx")
+            if not self.test_summary_writer:
+                self._set_up_tensorboard_writer()
+            if self.test_summary_writer:
+                with self.test_summary_writer.as_default():
+                    tf.summary.image(
+                        "TED_attention_weights",
+                        image_from_attention_weights(attention_weights),
+                        step=0,
+                        max_outputs=9
+                    )
+
         return {"action_scores": scores}
 
 
+def image_from_attention_weights(attention_weights: tf.Tensor) -> tf.Tensor:
+    num_layers = tf.shape(attention_weights)[0]
+    batch_size = tf.shape(attention_weights)[1]
+    num_heads = tf.shape(attention_weights)[2]
+    length = tf.shape(attention_weights)[3]
+
+    img = attention_weights
+    img = tf.transpose(img, perm=[1, 0, 2, 3, 4])
+    img = tf.reshape(img, (batch_size * num_layers, length, num_heads * length, 1))
+    # img = tf.transpose(img, perm=[0, 2, 1, 3])
+    # rescale values to range from 0.0 to 1.0
+    img = normalize_image(img)
+    return img
+
+
+def normalize_image(image: tf.Tensor) -> tf.Tensor:
+    return (image - tf.reduce_min(image)) / (
+        tf.reduce_max(image) - tf.reduce_min(image)
+    )
+
+
 # pytype: enable=key-error
diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 07b8be6aa1ad..037c839b67b4 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -1432,7 +1432,7 @@ def _prepare_sequence_layers(self, name: Text) -> None:
             )
         else:
             # create lambda so that it can be used later without the check
-            self._tf_layers[f"{name}_transformer"] = lambda x, mask, training: x
+            self._tf_layers[f"{name}_transformer"] = lambda x, mask, training: x, None  # ToDo: is it ok to return None for attention_weights as second argument?
 
     def _prepare_mask_lm_layers(self, name: Text) -> None:
         self._tf_layers[f"{name}_input_mask"] = layers.InputMask()
@@ -1658,7 +1658,7 @@ def _create_sequence(
             transformer_inputs = inputs
             lm_mask_bool = None
 
-        outputs = self._tf_layers[f"{name}_transformer"](
+        outputs, attention_weights = self._tf_layers[f"{name}_transformer"](
             transformer_inputs, 1 - mask, self._training
         )
 
diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py
index 982485a0cbbd..54360a9e61ca 100644
--- a/rasa/utils/tensorflow/models.py
+++ b/rasa/utils/tensorflow/models.py
@@ -408,9 +408,9 @@ def batch_to_model_data_format(
         """Convert input batch tensors into batch data format.
 
         Batch contains any number of batch data. The order is equal to the
-        key-value pairs in session data. As sparse data were converted into indices,
-        data, shape before, this methods converts them into sparse tensors. Dense data
-        is kept.
+        key-value pairs in session data. As sparse data were converted into (indices,
+        data, shape before), this method converts them into sparse tensors. Dense 
+        data is kept.
         """
 
         batch_data = defaultdict(list)
diff --git a/rasa/utils/tensorflow/transformer.py b/rasa/utils/tensorflow/transformer.py
index 6a24fa7b7512..78998233f904 100644
--- a/rasa/utils/tensorflow/transformer.py
+++ b/rasa/utils/tensorflow/transformer.py
@@ -4,7 +4,6 @@
 from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.keras import backend as K
 import numpy as np
-from tensorflow_core.python.ops.summary_ops_v2 import ResourceSummaryWriter
 
 from rasa.utils.tensorflow.layers import DenseWithSparseWeights
 
@@ -421,12 +420,9 @@ def __init__(
         use_value_relative_position: bool = False,
         max_relative_position: Optional[int] = None,
         heads_share_relative_embedding: bool = False,
-        summary_writer: Optional[ResourceSummaryWriter] = None,
     ) -> None:
         super().__init__()
 
-        self._summary_writer = summary_writer
-
         self._layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-6)
         self._mha = MultiHeadAttention(
             units,
@@ -458,7 +454,7 @@ def call(
         x: tf.Tensor,
         pad_mask: Optional[tf.Tensor] = None,
         training: Optional[Union[tf.Tensor, bool]] = None,
-    ) -> tf.Tensor:
+    ) -> Tuple[tf.Tensor, tf.Tensor]:
         """Apply transformer encoder layer.
 
         Arguments:
@@ -485,39 +481,7 @@ def call(
             ffn_out = layer(ffn_out, training=training)
         x += ffn_out
 
-        if self._summary_writer:
-            # attention_weights.shape == (batch_size, num_heads, length, length)
-
-            # for i_head in range(self._mha.num_heads):
-            #     # select attention weights of one head and reshape for image output
-            #     img = attn_weights[:, i_head, :, :]
-            #     img = tf.expand_dims(img, axis=-1)
-            #     # rescale values to range from 0.0 to 1.0
-            #     img = (img - tf.reduce_min(img)) / (
-            #         tf.reduce_max(img) - tf.reduce_min(img)
-            #     )
-            #     # write image to tensorboard
-            #     with self._summary_writer.as_default():
-            #         tf.summary.image(
-            #             f"attn_weights_h{i_head}", img, step=0, max_outputs=20
-            #         )
-
-            # reshape so we get all attention heads in one image
-            num_heads = self._mha.num_heads
-            length = tf.shape(x)[1]
-            img = tf.reshape(attn_weights, (-1, num_heads * length, length, 1))
-            img = tf.transpose(img, perm=[0, 2, 1, 3])
-            # rescale values to range from 0.0 to 1.0
-            img = (img - tf.reduce_min(img)) / (
-                tf.reduce_max(img) - tf.reduce_min(img)
-            )
-            # write image to tensorboard
-            with self._summary_writer.as_default():
-                tf.summary.image(
-                    f"attn_weights", img, step=0, max_outputs=20
-                )
-
-        return x  # (batch_size, length, units)
+        return x, attn_weights  # (batch_size, length, units), (batch_size, num_heads, length, length)
 
 
 class TransformerEncoder(tf.keras.layers.Layer):
@@ -563,7 +527,6 @@ def __init__(
         max_relative_position: Optional[int] = None,
         heads_share_relative_embedding: bool = False,
         name: Optional[Text] = None,
-        summary_writer: Optional[ResourceSummaryWriter] = None,
     ) -> None:
         super().__init__(name=name)
 
@@ -594,7 +557,6 @@ def __init__(
                 use_value_relative_position,
                 max_relative_position,
                 heads_share_relative_embedding,
-                summary_writer,
             )
             for _ in range(num_layers)
         ]
@@ -632,7 +594,7 @@ def call(
         x: tf.Tensor,
         pad_mask: Optional[tf.Tensor] = None,
         training: Optional[Union[tf.Tensor, bool]] = None,
-    ) -> tf.Tensor:
+    ) -> Tuple[tf.Tensor, tf.Tensor]:
         """Apply transformer encoder.
 
         Arguments:
@@ -661,10 +623,15 @@ def call(
                     1.0, pad_mask + self._look_ahead_pad_mask(tf.shape(pad_mask)[-1])
                 )  # (batch_size, 1, length, length)
 
+        layer_attention_weights = []
+
         for layer in self._enc_layers:
-            x = layer(x, pad_mask=pad_mask, training=training)
+            x, attn_weights = layer(x, pad_mask=pad_mask, training=training)
+            layer_attention_weights.append(attn_weights)
 
         # if normalization is done in encoding layers, then it should also be done
         # on the output, since the output can grow very large, being the sum of
         # a whole stack of unnormalized layer outputs.
-        return self._layer_norm(x)  # (batch_size, length, units)
+        x = self._layer_norm(x)  # (batch_size, length, units)
+
+        return x, tf.stack(layer_attention_weights)  # (batch_size, length, units), (num_layers, batch_size, num_heads, length, length)

From f773a168859af3dfe179212d3c8e081d77fb2cb0 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Mon, 12 Oct 2020 17:37:46 +0200
Subject: [PATCH 003/102] Implement png output for DIET

---
 rasa/core/policies/ted_policy.py        | 35 -------------------------
 rasa/nlu/classifiers/diet_classifier.py | 17 +++++++++---
 rasa/utils/plotting.py                  | 35 +++++++++++++++++++++++++
 3 files changed, 49 insertions(+), 38 deletions(-)

diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index a9caab216590..bf23520ce66a 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -840,42 +840,7 @@ def batch_predict(
             sim_all, self.config[SIMILARITY_TYPE]
         )
 
-        if self.tensorboard_log_dir and len(batch_in) == 1:
-            # Log attention weights if we make a single prediction
-            print("xxxxxxxxxxxxxxxxxxxxxxxx")
-            if not self.test_summary_writer:
-                self._set_up_tensorboard_writer()
-            if self.test_summary_writer:
-                with self.test_summary_writer.as_default():
-                    tf.summary.image(
-                        "TED_attention_weights",
-                        image_from_attention_weights(attention_weights),
-                        step=0,
-                        max_outputs=9
-                    )
 
         return {"action_scores": scores}
 
-
-def image_from_attention_weights(attention_weights: tf.Tensor) -> tf.Tensor:
-    num_layers = tf.shape(attention_weights)[0]
-    batch_size = tf.shape(attention_weights)[1]
-    num_heads = tf.shape(attention_weights)[2]
-    length = tf.shape(attention_weights)[3]
-
-    img = attention_weights
-    img = tf.transpose(img, perm=[1, 0, 2, 3, 4])
-    img = tf.reshape(img, (batch_size * num_layers, length, num_heads * length, 1))
-    # img = tf.transpose(img, perm=[0, 2, 1, 3])
-    # rescale values to range from 0.0 to 1.0
-    img = normalize_image(img)
-    return img
-
-
-def normalize_image(image: tf.Tensor) -> tf.Tensor:
-    return (image - tf.reduce_min(image)) / (
-        tf.reduce_max(image) - tf.reduce_min(image)
-    )
-
-
 # pytype: enable=key-error
diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 870994355dba..88b349246b6a 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -21,6 +21,7 @@
 from rasa.nlu.test import determine_token_labels
 from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
 from rasa.utils import train_utils
+from rasa.utils.plotting import plot_attention_weights
 from rasa.utils.tensorflow import layers
 from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
 from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature
@@ -898,6 +899,13 @@ def process(self, message: Message, **kwargs: Any) -> None:
 
             message.set(ENTITIES, entities, add_to_output=True)
 
+        if "diagnostic_data" in out and "attention_weights" in out["diagnostic_data"]:
+            plot_attention_weights(
+                out["diagnostic_data"]["attention_weights"],
+                output_file="JOHANNES.png"  # ToDo: Fix
+            )
+
+
     def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
         """Persist this model into the passed directory.
 
@@ -1447,7 +1455,7 @@ def _create_sequence(
         dense_dropout: bool = False,
         masked_lm_loss: bool = False,
         sequence_ids: bool = False,
-    ) -> Tuple[tf.Tensor, tf.Tensor, Optional[tf.Tensor], Optional[tf.Tensor]]:
+    ) -> Tuple[tf.Tensor, tf.Tensor, Optional[tf.Tensor], Optional[tf.Tensor], Optional[tf.Tensor]]:
         if sequence_ids:
             seq_ids = self._features_as_seq_ids(sequence_features, f"{name}_{SEQUENCE}")
         else:
@@ -1480,7 +1488,7 @@ def _create_sequence(
             # apply activation
             outputs = tfa.activations.gelu(outputs)
 
-        return outputs, inputs, seq_ids, lm_mask_bool
+        return outputs, inputs, seq_ids, lm_mask_bool, attention_weights
 
     def _create_all_labels(self) -> Tuple[tf.Tensor, tf.Tensor]:
         all_label_ids = self.tf_label_data[LABEL_KEY][LABEL_SUB_KEY][0]
@@ -1610,6 +1618,7 @@ def batch_loss(
             text_in,
             text_seq_ids,
             lm_mask_bool_text,
+            _,
         ) = self._create_sequence(
             tf_batch_data[TEXT][SEQUENCE],
             tf_batch_data[TEXT][SENTENCE],
@@ -1746,7 +1755,7 @@ def batch_predict(
 
         mask = self._compute_mask(sequence_lengths)
 
-        text_transformed, _, _, _ = self._create_sequence(
+        text_transformed, _, _, _, attention_weights = self._create_sequence(
             tf_batch_data[TEXT][SEQUENCE],
             tf_batch_data[TEXT][SENTENCE],
             mask_sequence_text,
@@ -1756,6 +1765,8 @@ def batch_predict(
 
         predictions: Dict[Text, tf.Tensor] = {}
 
+        predictions["diagnostic_data"] = {"attention_weights": attention_weights}
+
         if self.config[INTENT_CLASSIFICATION]:
             predictions.update(
                 self._batch_predict_intents(sequence_lengths, text_transformed)
diff --git a/rasa/utils/plotting.py b/rasa/utils/plotting.py
index fdd7dfe50abe..d64bcad59b87 100644
--- a/rasa/utils/plotting.py
+++ b/rasa/utils/plotting.py
@@ -197,3 +197,38 @@ def plot_curve(
     plt.savefig(graph_path, format="pdf")
 
     logger.info(f"Comparison graph saved to '{graph_path}'.")
+
+
+def plot_attention_weights(
+    attention_weights: np.ndarray,
+    labels: Optional[List[Text]] = None,
+    output_file: Optional[Text] = None,
+) -> None:
+    # shape=(num_transformer_layers, 1, num_heads, length, length)
+    assert len(attention_weights.shape) == 5
+    assert attention_weights.shape[0] > 0
+    assert attention_weights.shape[2] > 0
+    assert attention_weights.shape[3] > 0
+    assert attention_weights.shape[4] == attention_weights.shape[3]
+
+    num_layers, _, num_heads, length, _ = attention_weights.shape
+
+    import matplotlib.pyplot as plt
+    
+    plt.figure(figsize=(num_layers, num_heads))
+    for layer_index in range(num_layers):
+        for head_index in range(num_heads):
+            plt.subplot(
+                num_layers, 
+                num_heads, 
+                layer_index * num_heads + head_index + 1,
+                title=f"{layer_index}/{head_index}"
+            )
+            plt.imshow(
+                attention_weights[layer_index - 1][0][head_index - 1]
+            )
+    
+    if output_file:
+        fig = plt.gcf()
+        fig.set_size_inches(10, 10)
+        fig.savefig(output_file, bbox_inches="tight")

From 1add96397545cfb7ac4fefef431652f545f9125f Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Fri, 16 Oct 2020 11:04:18 +0200
Subject: [PATCH 004/102] Add --diagnostics option

---
 rasa/cli/shell.py                       |  8 +++++++-
 rasa/nlu/classifiers/diet_classifier.py | 11 +++++++----
 rasa/nlu/model.py                       |  3 ++-
 rasa/nlu/run.py                         |  6 ++++--
 rasa/shared/nlu/constants.py            |  3 +++
 rasa/utils/plotting.py                  |  3 ++-
 6 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/rasa/cli/shell.py b/rasa/cli/shell.py
index 597de68d7602..dc578a3304e4 100644
--- a/rasa/cli/shell.py
+++ b/rasa/cli/shell.py
@@ -53,6 +53,12 @@ def add_subparser(
 
     shell_nlu_subparser.set_defaults(func=shell_nlu)
 
+    shell_nlu_subparser.add_argument(
+        "--diagnostics",
+        action="store_true",
+        help="Output diagnostic data (s.a. attention weights)"
+    )
+
     arguments.set_shell_arguments(shell_parser)
     arguments.set_shell_nlu_arguments(shell_nlu_subparser)
 
@@ -86,7 +92,7 @@ def shell_nlu(args: argparse.Namespace):
         return
 
     telemetry.track_shell_started("nlu")
-    rasa.nlu.run.run_cmdline(nlu_model)
+    rasa.nlu.run.run_cmdline(nlu_model, show_diagnostics=args.diagnostics)
 
 
 def shell(args: argparse.Namespace):
diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 88b349246b6a..c9c80cd3e77f 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -35,6 +35,8 @@
     ENTITY_ATTRIBUTE_GROUP,
     ENTITY_ATTRIBUTE_ROLE,
     NO_ENTITY_TAG,
+    DIAGNOSTIC_DATA,
+    ATTENTION_WEIGHTS,
 )
 from rasa.nlu.config import RasaNLUModelConfig, InvalidConfigError
 from rasa.shared.nlu.training_data.training_data import TrainingData
@@ -883,7 +885,7 @@ def _entity_label_to_tags(
 
         return predicted_tags, confidence_values
 
-    def process(self, message: Message, **kwargs: Any) -> None:
+    def process(self, message: Message, show_diagnostics: bool = False, **kwargs: Any) -> None:
         """Return the most likely label and its similarity to the input."""
 
         out = self._predict(message)
@@ -899,9 +901,10 @@ def process(self, message: Message, **kwargs: Any) -> None:
 
             message.set(ENTITIES, entities, add_to_output=True)
 
-        if "diagnostic_data" in out and "attention_weights" in out["diagnostic_data"]:
+        if show_diagnostics and DIAGNOSTIC_DATA in out and ATTENTION_WEIGHTS in out[DIAGNOSTIC_DATA]:
             plot_attention_weights(
-                out["diagnostic_data"]["attention_weights"],
+                out[DIAGNOSTIC_DATA][ATTENTION_WEIGHTS],
+                title="DIETClassifier",
                 output_file="JOHANNES.png"  # ToDo: Fix
             )
 
@@ -1765,7 +1768,7 @@ def batch_predict(
 
         predictions: Dict[Text, tf.Tensor] = {}
 
-        predictions["diagnostic_data"] = {"attention_weights": attention_weights}
+        predictions[DIAGNOSTIC_DATA] = {ATTENTION_WEIGHTS: attention_weights}
 
         if self.config[INTENT_CLASSIFICATION]:
             predictions.update(
diff --git a/rasa/nlu/model.py b/rasa/nlu/model.py
index 2314f12b1186..14f95794fa53 100644
--- a/rasa/nlu/model.py
+++ b/rasa/nlu/model.py
@@ -375,6 +375,7 @@ def parse(
         text: Text,
         time: Optional[datetime.datetime] = None,
         only_output_properties: bool = True,
+        show_diagnostics: bool = False,
     ) -> Dict[Text, Any]:
         """Parse the input text, classify it and return pipeline result.
 
@@ -395,7 +396,7 @@ def parse(
         message = Message(data=data, time=time)
 
         for component in self.pipeline:
-            component.process(message, **self.context)
+            component.process(message, show_diagnostics=show_diagnostics, **self.context)
 
         output = self.default_output_attributes()
         output.update(message.as_dict(only_output_properties=only_output_properties))
diff --git a/rasa/nlu/run.py b/rasa/nlu/run.py
index 4471ea706e30..463cce09e025 100644
--- a/rasa/nlu/run.py
+++ b/rasa/nlu/run.py
@@ -16,7 +16,9 @@
 
 
 def run_cmdline(
-    model_path: Text, component_builder: Optional["ComponentBuilder"] = None
+    model_path: Text, 
+    show_diagnostics: bool = False,
+    component_builder: Optional["ComponentBuilder"] = None
 ) -> None:
     interpreter = Interpreter.load(model_path, component_builder)
     regex_interpreter = RegexInterpreter()
@@ -33,6 +35,6 @@ def run_cmdline(
         if message.startswith(INTENT_MESSAGE_PREFIX):
             result = rasa.utils.common.run_in_loop(regex_interpreter.parse(message))
         else:
-            result = interpreter.parse(message)
+            result = interpreter.parse(message, show_diagnostics=show_diagnostics)
 
         print(json_to_string(result))
diff --git a/rasa/shared/nlu/constants.py b/rasa/shared/nlu/constants.py
index dde63d4c32f1..9c19589550c7 100644
--- a/rasa/shared/nlu/constants.py
+++ b/rasa/shared/nlu/constants.py
@@ -34,3 +34,6 @@
 ENTITY_ATTRIBUTE_START = "start"
 ENTITY_ATTRIBUTE_END = "end"
 NO_ENTITY_TAG = "O"
+
+DIAGNOSTIC_DATA = "diagnostic_data"
+ATTENTION_WEIGHTS = "attention_weights"
diff --git a/rasa/utils/plotting.py b/rasa/utils/plotting.py
index d64bcad59b87..172c8ef097cb 100644
--- a/rasa/utils/plotting.py
+++ b/rasa/utils/plotting.py
@@ -201,7 +201,7 @@ def plot_curve(
 
 def plot_attention_weights(
     attention_weights: np.ndarray,
-    labels: Optional[List[Text]] = None,
+    title: Optional[Text] = None,
     output_file: Optional[Text] = None,
 ) -> None:
     # shape=(num_transformer_layers, 1, num_heads, length, length)
@@ -227,6 +227,7 @@ def plot_attention_weights(
             plt.imshow(
                 attention_weights[layer_index - 1][0][head_index - 1]
             )
+    plt.suptitle(title)
     
     if output_file:
         fig = plt.gcf()

From ba30d9b9aa0b2cf7d9af46f876dd25e0e132d392 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Fri, 16 Oct 2020 15:46:55 +0200
Subject: [PATCH 005/102] Move constants

---
 rasa/cli/shell.py                       |  2 +-
 rasa/core/policies/ted_policy.py        | 12 +++++++++++-
 rasa/nlu/classifiers/diet_classifier.py |  3 +--
 rasa/shared/constants.py                |  3 +++
 rasa/shared/nlu/constants.py            |  3 ---
 5 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/rasa/cli/shell.py b/rasa/cli/shell.py
index dc578a3304e4..db1ab90867f6 100644
--- a/rasa/cli/shell.py
+++ b/rasa/cli/shell.py
@@ -120,7 +120,7 @@ def shell(args: argparse.Namespace):
 
         telemetry.track_shell_started("nlu")
 
-        rasa.nlu.run.run_cmdline(nlu_model)
+        rasa.nlu.run.run_cmdline(nlu_model, show_diagnostics=args.show_diagnostics)
     else:
         import rasa.cli.run
 
diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index bf23520ce66a..722a64871290 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -22,10 +22,12 @@
 from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter
 from rasa.core.policies.policy import Policy
 from rasa.core.constants import DEFAULT_POLICY_PRIORITY, DIALOGUE
+from rasa.shared.constants import DIAGNOSTIC_DATA, ATTENTION_WEIGHTS
 from rasa.shared.core.constants import ACTIVE_LOOP, SLOTS
 from rasa.shared.core.trackers import DialogueStateTracker
 from rasa.shared.core.generator import TrackerWithCachedStates
 from rasa.utils import train_utils
+from rasa.utils.plotting import plot_attention_weights
 from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
 from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature
 from rasa.utils.tensorflow.model_data_utils import convert_to_data_format
@@ -371,6 +373,7 @@ def predict_action_probabilities(
         tracker: DialogueStateTracker,
         domain: Domain,
         interpreter: NaturalLanguageInterpreter,
+        show_diagnostics: bool = False,
         **kwargs: Any,
     ) -> List[float]:
         """Predict the next action the bot should take.
@@ -395,6 +398,13 @@ def predict_action_probabilities(
         if self.config[LOSS_TYPE] == SOFTMAX and self.config[RANKING_LENGTH] > 0:
             confidence = train_utils.normalize(confidence, self.config[RANKING_LENGTH])
 
+        if show_diagnostics and DIAGNOSTIC_DATA in output and ATTENTION_WEIGHTS in output[DIAGNOSTIC_DATA]:
+            plot_attention_weights(
+                output[DIAGNOSTIC_DATA][ATTENTION_WEIGHTS],
+                title="TEDPolicy",
+                output_file="JOHANNES-TED.png"  # ToDo: Fix
+            )
+
         return confidence.tolist()
 
     def persist(self, path: Union[Text, Path]) -> None:
@@ -841,6 +851,6 @@ def batch_predict(
         )
 
 
-        return {"action_scores": scores}
+        return {"action_scores": scores, DIAGNOSTIC_DATA: {ATTENTION_WEIGHTS: attention_weights}}
 
 # pytype: enable=key-error
diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index c9c80cd3e77f..890e1903da7d 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -35,9 +35,8 @@
     ENTITY_ATTRIBUTE_GROUP,
     ENTITY_ATTRIBUTE_ROLE,
     NO_ENTITY_TAG,
-    DIAGNOSTIC_DATA,
-    ATTENTION_WEIGHTS,
 )
+from rasa.shared.constants import DIAGNOSTIC_DATA, ATTENTION_WEIGHTS
 from rasa.nlu.config import RasaNLUModelConfig, InvalidConfigError
 from rasa.shared.nlu.training_data.training_data import TrainingData
 from rasa.shared.nlu.training_data.message import Message
diff --git a/rasa/shared/constants.py b/rasa/shared/constants.py
index 91f2b14e7940..58dbed40fc6c 100644
--- a/rasa/shared/constants.py
+++ b/rasa/shared/constants.py
@@ -72,3 +72,6 @@
 DEFAULT_NLU_RESULTS_PATH = "nlu_comparison_results"
 DEFAULT_CORE_SUBDIRECTORY_NAME = "core"
 DEFAULT_NLU_SUBDIRECTORY_NAME = "nlu"
+
+DIAGNOSTIC_DATA = "diagnostic_data"
+ATTENTION_WEIGHTS = "attention_weights"
\ No newline at end of file
diff --git a/rasa/shared/nlu/constants.py b/rasa/shared/nlu/constants.py
index fe17a6c3802f..5bc16fa6c86f 100644
--- a/rasa/shared/nlu/constants.py
+++ b/rasa/shared/nlu/constants.py
@@ -33,6 +33,3 @@
 ENTITY_ATTRIBUTE_START = "start"
 ENTITY_ATTRIBUTE_END = "end"
 NO_ENTITY_TAG = "O"
-
-DIAGNOSTIC_DATA = "diagnostic_data"
-ATTENTION_WEIGHTS = "attention_weights"

From 12bfc4c87f2730a429f336f90c494d5489f352ff Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Fri, 16 Oct 2020 16:34:41 +0200
Subject: [PATCH 006/102] Use tensorboard config instead of flag

---
 rasa/cli/shell.py                       | 4 ++--
 rasa/core/policies/ted_policy.py        | 3 +--
 rasa/nlu/classifiers/diet_classifier.py | 4 ++--
 rasa/nlu/model.py                       | 3 +--
 rasa/nlu/run.py                         | 3 +--
 rasa/utils/plotting.py                  | 3 +++
 6 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/rasa/cli/shell.py b/rasa/cli/shell.py
index db1ab90867f6..bca78e25ed16 100644
--- a/rasa/cli/shell.py
+++ b/rasa/cli/shell.py
@@ -92,7 +92,7 @@ def shell_nlu(args: argparse.Namespace):
         return
 
     telemetry.track_shell_started("nlu")
-    rasa.nlu.run.run_cmdline(nlu_model, show_diagnostics=args.diagnostics)
+    rasa.nlu.run.run_cmdline(nlu_model)
 
 
 def shell(args: argparse.Namespace):
@@ -120,7 +120,7 @@ def shell(args: argparse.Namespace):
 
         telemetry.track_shell_started("nlu")
 
-        rasa.nlu.run.run_cmdline(nlu_model, show_diagnostics=args.show_diagnostics)
+        rasa.nlu.run.run_cmdline(nlu_model)
     else:
         import rasa.cli.run
 
diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index 722a64871290..b5a490755f48 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -373,7 +373,6 @@ def predict_action_probabilities(
         tracker: DialogueStateTracker,
         domain: Domain,
         interpreter: NaturalLanguageInterpreter,
-        show_diagnostics: bool = False,
         **kwargs: Any,
     ) -> List[float]:
         """Predict the next action the bot should take.
@@ -398,7 +397,7 @@ def predict_action_probabilities(
         if self.config[LOSS_TYPE] == SOFTMAX and self.config[RANKING_LENGTH] > 0:
             confidence = train_utils.normalize(confidence, self.config[RANKING_LENGTH])
 
-        if show_diagnostics and DIAGNOSTIC_DATA in output and ATTENTION_WEIGHTS in output[DIAGNOSTIC_DATA]:
+        if self.config[TENSORBOARD_LOG_DIR] and DIAGNOSTIC_DATA in output and ATTENTION_WEIGHTS in output[DIAGNOSTIC_DATA]:
             plot_attention_weights(
                 output[DIAGNOSTIC_DATA][ATTENTION_WEIGHTS],
                 title="TEDPolicy",
diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 890e1903da7d..819db63dad1b 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -884,7 +884,7 @@ def _entity_label_to_tags(
 
         return predicted_tags, confidence_values
 
-    def process(self, message: Message, show_diagnostics: bool = False, **kwargs: Any) -> None:
+    def process(self, message: Message, **kwargs: Any) -> None:
         """Return the most likely label and its similarity to the input."""
 
         out = self._predict(message)
@@ -900,7 +900,7 @@ def process(self, message: Message, show_diagnostics: bool = False, **kwargs: An
 
             message.set(ENTITIES, entities, add_to_output=True)
 
-        if show_diagnostics and DIAGNOSTIC_DATA in out and ATTENTION_WEIGHTS in out[DIAGNOSTIC_DATA]:
+        if self.component_config[TENSORBOARD_LOG_DIR] and DIAGNOSTIC_DATA in out and ATTENTION_WEIGHTS in out[DIAGNOSTIC_DATA]:
             plot_attention_weights(
                 out[DIAGNOSTIC_DATA][ATTENTION_WEIGHTS],
                 title="DIETClassifier",
diff --git a/rasa/nlu/model.py b/rasa/nlu/model.py
index 14f95794fa53..2314f12b1186 100644
--- a/rasa/nlu/model.py
+++ b/rasa/nlu/model.py
@@ -375,7 +375,6 @@ def parse(
         text: Text,
         time: Optional[datetime.datetime] = None,
         only_output_properties: bool = True,
-        show_diagnostics: bool = False,
     ) -> Dict[Text, Any]:
         """Parse the input text, classify it and return pipeline result.
 
@@ -396,7 +395,7 @@ def parse(
         message = Message(data=data, time=time)
 
         for component in self.pipeline:
-            component.process(message, show_diagnostics=show_diagnostics, **self.context)
+            component.process(message, **self.context)
 
         output = self.default_output_attributes()
         output.update(message.as_dict(only_output_properties=only_output_properties))
diff --git a/rasa/nlu/run.py b/rasa/nlu/run.py
index 463cce09e025..d6a387377cbe 100644
--- a/rasa/nlu/run.py
+++ b/rasa/nlu/run.py
@@ -17,7 +17,6 @@
 
 def run_cmdline(
     model_path: Text, 
-    show_diagnostics: bool = False,
     component_builder: Optional["ComponentBuilder"] = None
 ) -> None:
     interpreter = Interpreter.load(model_path, component_builder)
@@ -35,6 +34,6 @@ def run_cmdline(
         if message.startswith(INTENT_MESSAGE_PREFIX):
             result = rasa.utils.common.run_in_loop(regex_interpreter.parse(message))
         else:
-            result = interpreter.parse(message, show_diagnostics=show_diagnostics)
+            result = interpreter.parse(message)
 
         print(json_to_string(result))
diff --git a/rasa/utils/plotting.py b/rasa/utils/plotting.py
index 172c8ef097cb..4f471b23a5af 100644
--- a/rasa/utils/plotting.py
+++ b/rasa/utils/plotting.py
@@ -233,3 +233,6 @@ def plot_attention_weights(
         fig = plt.gcf()
         fig.set_size_inches(10, 10)
         fig.savefig(output_file, bbox_inches="tight")
+        # Delete the figure from memory since it's saved now
+        plt.clf()
+        plt.close()

From 181685733bd58e262dcccea78738b56dde4bd9d0 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Fri, 16 Oct 2020 17:50:31 +0200
Subject: [PATCH 007/102] Remove arg again

---
 rasa/cli/shell.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/rasa/cli/shell.py b/rasa/cli/shell.py
index bca78e25ed16..597de68d7602 100644
--- a/rasa/cli/shell.py
+++ b/rasa/cli/shell.py
@@ -53,12 +53,6 @@ def add_subparser(
 
     shell_nlu_subparser.set_defaults(func=shell_nlu)
 
-    shell_nlu_subparser.add_argument(
-        "--diagnostics",
-        action="store_true",
-        help="Output diagnostic data (s.a. attention weights)"
-    )
-
     arguments.set_shell_arguments(shell_parser)
     arguments.set_shell_nlu_arguments(shell_nlu_subparser)
 

From 8d4667c5a9193da89c6f1928bc599cef88c304b0 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Wed, 21 Oct 2020 11:13:43 +0200
Subject: [PATCH 008/102] Add _with_diagnostics methods

---
 rasa/core/policies/policy.py            | 22 ++++++++++++++++++++++
 rasa/core/policies/ted_policy.py        | 25 +++++++++++++++++--------
 rasa/nlu/classifiers/diet_classifier.py | 15 ++++++++-------
 rasa/nlu/components.py                  | 17 +++++++++++++++++
 4 files changed, 64 insertions(+), 15 deletions(-)

diff --git a/rasa/core/policies/policy.py b/rasa/core/policies/policy.py
index 12a3263d28d0..b71a89478871 100644
--- a/rasa/core/policies/policy.py
+++ b/rasa/core/policies/policy.py
@@ -215,6 +215,28 @@ def predict_action_probabilities(
 
         raise NotImplementedError("Policy must have the capacity to predict.")
 
+    def predict_action_probabilities_with_diagnostics(
+        self,
+        tracker: DialogueStateTracker,
+        domain: Domain,
+        interpreter: NaturalLanguageInterpreter,
+        **kwargs: Any,
+    ) -> Tuple[List[float], Optional[Dict[Text, Any]]]:
+        """Predicts the next action and diagnostic data for debugging.
+
+        Args:
+            tracker: the :class:`rasa.core.trackers.DialogueStateTracker`
+            domain: the :class:`rasa.shared.core.domain.Domain`
+            interpreter: Interpreter which may be used by the policies to create
+                additional features.
+
+        Returns:
+             The list of probabilities for the next actions and a dictionary of
+             diagnostic data (or None).
+        """
+
+        return self.predict_action_probabilities(tracker, domain, interpreter, **kwargs), None
+
     def _metadata(self) -> Optional[Dict[Text, Any]]:
         """Returns this policy's attributes that should be persisted.
 
diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index 13f961cfb5c2..2cdfa18f74ef 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -379,6 +379,22 @@ def predict_action_probabilities(
         Return the list of probabilities for the next actions.
         """
 
+        action_probabilities, _ = self.predict_action_probabilities_with_diagnostics(tracker, domain, interpreter, **kwargs)
+        return action_probabilities
+
+    def predict_action_probabilities_with_diagnostics(
+        self,
+        tracker: DialogueStateTracker,
+        domain: Domain,
+        interpreter: NaturalLanguageInterpreter,
+        **kwargs: Any,
+    ) -> Tuple[List[float], Optional[Dict[Text, Any]]]:
+        """Predict the next action the bot should take.
+        
+        Return the list of probabilities for the next actions and
+        diagnostic data.
+        """
+
         if self.model is None:
             return self._default_predictions(domain)
 
@@ -397,14 +413,7 @@ def predict_action_probabilities(
         if self.config[LOSS_TYPE] == SOFTMAX and self.config[RANKING_LENGTH] > 0:
             confidence = train_utils.normalize(confidence, self.config[RANKING_LENGTH])
 
-        if self.config[TENSORBOARD_LOG_DIR] and DIAGNOSTIC_DATA in output and ATTENTION_WEIGHTS in output[DIAGNOSTIC_DATA]:
-            plot_attention_weights(
-                output[DIAGNOSTIC_DATA][ATTENTION_WEIGHTS],
-                title="TEDPolicy",
-                output_file="JOHANNES-TED.png"  # ToDo: Fix
-            )
-
-        return confidence.tolist()
+        return confidence.tolist(), output.get(DIAGNOSTIC_DATA)
 
     def persist(self, path: Union[Text, Path]) -> None:
         """Persists the policy to a storage."""
diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 2c54be302f22..5df0e2f465ea 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -885,7 +885,13 @@ def _entity_label_to_tags(
         return predicted_tags, confidence_values
 
     def process(self, message: Message, **kwargs: Any) -> None:
-        """Return the most likely label and its similarity to the input."""
+        """Augment the message with intents and entities."""
+
+        self.process_with_diagnostics(message, **kwargs)
+
+    
+    def process_with_diagnostics(self, message: Message, **kwargs: Any) -> Optional[Dict[Text, Any]]:
+        """Augment the message with intents and entities and return diagnostic data."""
 
         out = self._predict(message)
 
@@ -900,12 +906,7 @@ def process(self, message: Message, **kwargs: Any) -> None:
 
             message.set(ENTITIES, entities, add_to_output=True)
 
-        if self.component_config[TENSORBOARD_LOG_DIR] and DIAGNOSTIC_DATA in out and ATTENTION_WEIGHTS in out[DIAGNOSTIC_DATA]:
-            plot_attention_weights(
-                out[DIAGNOSTIC_DATA][ATTENTION_WEIGHTS],
-                title="DIETClassifier",
-                output_file="JOHANNES.png"  # ToDo: Fix
-            )
+        return out.get(DIAGNOSTIC_DATA)
 
 
     def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py
index 1f975fc6709c..66caedbfa534 100644
--- a/rasa/nlu/components.py
+++ b/rasa/nlu/components.py
@@ -550,6 +550,23 @@ def process(self, message: Message, **kwargs: Any) -> None:
 
         pass
 
+    def process_with_diagnostics(self, message: Message, **kwargs: Any) -> Optional[Dict[Text, Any]]:
+        """Process an incoming message and return diagnostic data.
+
+        This is the same as :meth:`rasa.nlu.components.Component.process`
+        but returns diagnostic data that may be used for debugging.
+
+        Args:
+            message: The :class:`rasa.shared.nlu.training_data.message.Message` to process.
+
+        Returns:
+            A dictionary of diagnostic data such as attention weights, or None.
+
+        """
+
+        self.process(message, **kwargs)
+        return None
+
     def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]:
         """Persist this component to disk for future loading.
 

From 0057f68cdec276ef7ec259f9e71bd125493cb7d1 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Wed, 21 Oct 2020 11:14:29 +0200
Subject: [PATCH 009/102] Remove `plot_attention_weights`

---
 rasa/utils/plotting.py | 39 ---------------------------------------
 1 file changed, 39 deletions(-)

diff --git a/rasa/utils/plotting.py b/rasa/utils/plotting.py
index 4f471b23a5af..fdd7dfe50abe 100644
--- a/rasa/utils/plotting.py
+++ b/rasa/utils/plotting.py
@@ -197,42 +197,3 @@ def plot_curve(
     plt.savefig(graph_path, format="pdf")
 
     logger.info(f"Comparison graph saved to '{graph_path}'.")
-
-
-def plot_attention_weights(
-    attention_weights: np.ndarray,
-    title: Optional[Text] = None,
-    output_file: Optional[Text] = None,
-) -> None:
-    # shape=(num_transformer_layers, 1, num_heads, length, length)
-    assert len(attention_weights.shape) == 5
-    assert attention_weights.shape[0] > 0
-    assert attention_weights.shape[2] > 0
-    assert attention_weights.shape[3] > 0
-    assert attention_weights.shape[4] == attention_weights.shape[3]
-
-    num_layers, _, num_heads, length, _ = attention_weights.shape
-
-    import matplotlib.pyplot as plt
-    
-    plt.figure(figsize=(num_layers, num_heads))
-    for layer_index in range(num_layers):
-        for head_index in range(num_heads):
-            plt.subplot(
-                num_layers, 
-                num_heads, 
-                layer_index * num_heads + head_index + 1,
-                title=f"{layer_index}/{head_index}"
-            )
-            plt.imshow(
-                attention_weights[layer_index - 1][0][head_index - 1]
-            )
-    plt.suptitle(title)
-    
-    if output_file:
-        fig = plt.gcf()
-        fig.set_size_inches(10, 10)
-        fig.savefig(output_file, bbox_inches="tight")
-        # Delete the figure from memory since it's saved now
-        plt.clf()
-        plt.close()

From e19451cc3c3dab3dfad3c324beae6152d8fe5316 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Wed, 21 Oct 2020 11:18:11 +0200
Subject: [PATCH 010/102] Fix return without model

---
 rasa/core/policies/ted_policy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index 2cdfa18f74ef..086369390429 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -396,7 +396,7 @@ def predict_action_probabilities_with_diagnostics(
         """
 
         if self.model is None:
-            return self._default_predictions(domain)
+            return self._default_predictions(domain), None
 
         # create model data from tracker
         tracker_state_features = self.featurizer.create_state_features(

From ad1b44c70fd162bd3f4f6f10a49ad169c32d477e Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Wed, 21 Oct 2020 11:21:29 +0200
Subject: [PATCH 011/102] Remove comment

---
 rasa/core/policies/ted_policy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index 086369390429..3f21b0bdc618 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -702,7 +702,7 @@ def _emebed_dialogue(
 
         dialogue_embed = self._tf_layers[f"embed.{DIALOGUE}"](dialogue_transformed)
 
-        return dialogue_embed, mask, attention_weights  # ToDo: ?, ?, (num_layers, batch_size, num_heads, length, length)
+        return dialogue_embed, mask, attention_weights
 
     def _encode_features_per_attribute(
         self, tf_batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]], attribute: Text

From f55aed26ea7b29ed3ec94ac7e93f0a012d56ce32 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Wed, 21 Oct 2020 11:26:08 +0200
Subject: [PATCH 012/102] Apply BLACK formatting

---
 rasa/core/policies/policy.py            |  5 ++++-
 rasa/core/policies/ted_policy.py        | 16 +++++++++++-----
 rasa/nlu/classifiers/diet_classifier.py | 14 ++++++++++----
 rasa/nlu/components.py                  |  4 +++-
 rasa/nlu/run.py                         |  3 +--
 rasa/shared/constants.py                |  2 +-
 rasa/utils/tensorflow/transformer.py    | 11 +++++++++--
 7 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/rasa/core/policies/policy.py b/rasa/core/policies/policy.py
index b71a89478871..c1a27255fdfb 100644
--- a/rasa/core/policies/policy.py
+++ b/rasa/core/policies/policy.py
@@ -235,7 +235,10 @@ def predict_action_probabilities_with_diagnostics(
              diagnostic data (or None).
         """
 
-        return self.predict_action_probabilities(tracker, domain, interpreter, **kwargs), None
+        return (
+            self.predict_action_probabilities(tracker, domain, interpreter, **kwargs),
+            None,
+        )
 
     def _metadata(self) -> Optional[Dict[Text, Any]]:
         """Returns this policy's attributes that should be persisted.
diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index 3f21b0bdc618..8d605bbe0960 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -379,7 +379,9 @@ def predict_action_probabilities(
         Return the list of probabilities for the next actions.
         """
 
-        action_probabilities, _ = self.predict_action_probabilities_with_diagnostics(tracker, domain, interpreter, **kwargs)
+        action_probabilities, _ = self.predict_action_probabilities_with_diagnostics(
+            tracker, domain, interpreter, **kwargs
+        )
         return action_probabilities
 
     def predict_action_probabilities_with_diagnostics(
@@ -688,9 +690,9 @@ def _emebed_dialogue(
 
         mask = self._compute_mask(sequence_lengths)
 
-        dialogue_transformed, attention_weights = self._tf_layers[f"transformer.{DIALOGUE}"](
-            dialogue_in, 1 - mask, self._training
-        )
+        dialogue_transformed, attention_weights = self._tf_layers[
+            f"transformer.{DIALOGUE}"
+        ](dialogue_in, 1 - mask, self._training)
         dialogue_transformed = tfa.activations.gelu(dialogue_transformed)
 
         if self.max_history_tracker_featurizer_used:
@@ -855,4 +857,8 @@ def batch_predict(
             sim_all, self.config[SIMILARITY_TYPE]
         )
 
-        return {"action_scores": scores, DIAGNOSTIC_DATA: {ATTENTION_WEIGHTS: attention_weights}}
+        return {
+            "action_scores": scores,
+            DIAGNOSTIC_DATA: {ATTENTION_WEIGHTS: attention_weights},
+        }
+
diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 5df0e2f465ea..68ed6536fe3a 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -889,8 +889,9 @@ def process(self, message: Message, **kwargs: Any) -> None:
 
         self.process_with_diagnostics(message, **kwargs)
 
-    
-    def process_with_diagnostics(self, message: Message, **kwargs: Any) -> Optional[Dict[Text, Any]]:
+    def process_with_diagnostics(
+        self, message: Message, **kwargs: Any
+    ) -> Optional[Dict[Text, Any]]:
         """Augment the message with intents and entities and return diagnostic data."""
 
         out = self._predict(message)
@@ -908,7 +909,6 @@ def process_with_diagnostics(self, message: Message, **kwargs: Any) -> Optional[
 
         return out.get(DIAGNOSTIC_DATA)
 
-
     def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
         """Persist this model into the passed directory.
 
@@ -1456,7 +1456,13 @@ def _create_sequence(
         dense_dropout: bool = False,
         masked_lm_loss: bool = False,
         sequence_ids: bool = False,
-    ) -> Tuple[tf.Tensor, tf.Tensor, Optional[tf.Tensor], Optional[tf.Tensor], Optional[tf.Tensor]]:
+    ) -> Tuple[
+        tf.Tensor,
+        tf.Tensor,
+        Optional[tf.Tensor],
+        Optional[tf.Tensor],
+        Optional[tf.Tensor],
+    ]:
         if sequence_ids:
             seq_ids = self._features_as_seq_ids(sequence_features, f"{name}_{SEQUENCE}")
         else:
diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py
index 66caedbfa534..15006095d8cf 100644
--- a/rasa/nlu/components.py
+++ b/rasa/nlu/components.py
@@ -550,7 +550,9 @@ def process(self, message: Message, **kwargs: Any) -> None:
 
         pass
 
-    def process_with_diagnostics(self, message: Message, **kwargs: Any) -> Optional[Dict[Text, Any]]:
+    def process_with_diagnostics(
+        self, message: Message, **kwargs: Any
+    ) -> Optional[Dict[Text, Any]]:
         """Process an incoming message and return diagnostic data.
 
         This is the same as :meth:`rasa.nlu.components.Component.process`
diff --git a/rasa/nlu/run.py b/rasa/nlu/run.py
index d6a387377cbe..4471ea706e30 100644
--- a/rasa/nlu/run.py
+++ b/rasa/nlu/run.py
@@ -16,8 +16,7 @@
 
 
 def run_cmdline(
-    model_path: Text, 
-    component_builder: Optional["ComponentBuilder"] = None
+    model_path: Text, component_builder: Optional["ComponentBuilder"] = None
 ) -> None:
     interpreter = Interpreter.load(model_path, component_builder)
     regex_interpreter = RegexInterpreter()
diff --git a/rasa/shared/constants.py b/rasa/shared/constants.py
index 58dbed40fc6c..1aa3b14583df 100644
--- a/rasa/shared/constants.py
+++ b/rasa/shared/constants.py
@@ -74,4 +74,4 @@
 DEFAULT_NLU_SUBDIRECTORY_NAME = "nlu"
 
 DIAGNOSTIC_DATA = "diagnostic_data"
-ATTENTION_WEIGHTS = "attention_weights"
\ No newline at end of file
+ATTENTION_WEIGHTS = "attention_weights"
diff --git a/rasa/utils/tensorflow/transformer.py b/rasa/utils/tensorflow/transformer.py
index 7d09a9423d4d..c4fafbffd1c8 100644
--- a/rasa/utils/tensorflow/transformer.py
+++ b/rasa/utils/tensorflow/transformer.py
@@ -480,7 +480,10 @@ def call(
             ffn_out = layer(ffn_out, training=training)
         x += ffn_out
 
-        return x, attn_weights  # (batch_size, length, units), (batch_size, num_heads, length, length)
+        return (
+            x,
+            attn_weights,
+        )  # (batch_size, length, units), (batch_size, num_heads, length, length)
 
 
 class TransformerEncoder(tf.keras.layers.Layer):
@@ -633,4 +636,8 @@ def call(
         # a whole stack of unnormalized layer outputs.
         x = self._layer_norm(x)  # (batch_size, length, units)
 
-        return x, tf.stack(layer_attention_weights)  # (batch_size, length, units), (num_layers, batch_size, num_heads, length, length)
+        return (
+            x,
+            tf.stack(layer_attention_weights),
+        )  # (batch_size, length, units), (num_layers, batch_size, num_heads, length, length)
+

From f6decb4d84639591102271884c152e8302bdd7b0 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Wed, 21 Oct 2020 12:00:37 +0200
Subject: [PATCH 013/102] Fix formatting

---
 rasa/core/policies/ted_policy.py     | 2 --
 rasa/utils/tensorflow/models.py      | 2 +-
 rasa/utils/tensorflow/transformer.py | 1 -
 3 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index 8d605bbe0960..d9406b75e8f1 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -392,7 +392,6 @@ def predict_action_probabilities_with_diagnostics(
         **kwargs: Any,
     ) -> Tuple[List[float], Optional[Dict[Text, Any]]]:
         """Predict the next action the bot should take.
-        
         Return the list of probabilities for the next actions and
         diagnostic data.
         """
@@ -861,4 +860,3 @@ def batch_predict(
             "action_scores": scores,
             DIAGNOSTIC_DATA: {ATTENTION_WEIGHTS: attention_weights},
         }
-
diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py
index 25f1f103f2bc..2f0270da7466 100644
--- a/rasa/utils/tensorflow/models.py
+++ b/rasa/utils/tensorflow/models.py
@@ -516,7 +516,7 @@ def batch_to_model_data_format(
 
         Batch contains any number of batch data. The order is equal to the
         key-value pairs in session data. As sparse data were converted into (indices,
-        data, shape) before, this method converts them into sparse tensors. Dense 
+        data, shape) before, this method converts them into sparse tensors. Dense
         data is kept.
         """
 
diff --git a/rasa/utils/tensorflow/transformer.py b/rasa/utils/tensorflow/transformer.py
index c4fafbffd1c8..7bd1f929546b 100644
--- a/rasa/utils/tensorflow/transformer.py
+++ b/rasa/utils/tensorflow/transformer.py
@@ -640,4 +640,3 @@ def call(
             x,
             tf.stack(layer_attention_weights),
         )  # (batch_size, length, units), (num_layers, batch_size, num_heads, length, length)
-

From fbd93fe18859a4276005296b912fc9fc4076d755 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Wed, 21 Oct 2020 12:05:01 +0200
Subject: [PATCH 014/102] Remove plot references

---
 rasa/core/policies/ted_policy.py        | 1 -
 rasa/nlu/classifiers/diet_classifier.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index d9406b75e8f1..a25407547715 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -27,7 +27,6 @@
 from rasa.shared.core.trackers import DialogueStateTracker
 from rasa.shared.core.generator import TrackerWithCachedStates
 from rasa.utils import train_utils
-from rasa.utils.plotting import plot_attention_weights
 from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
 from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature
 from rasa.utils.tensorflow.model_data_utils import convert_to_data_format
diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 68ed6536fe3a..7bbf2552b965 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -21,7 +21,6 @@
 from rasa.nlu.test import determine_token_labels
 from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
 from rasa.utils import train_utils
-from rasa.utils.plotting import plot_attention_weights
 from rasa.utils.tensorflow import layers
 from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
 from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature

From 88a1d34e034b4b5bf2ecf3f638aa8fbf191c3153 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Mon, 26 Oct 2020 16:58:51 +0100
Subject: [PATCH 015/102] Move constants out of shared

---
 rasa/constants.py                       | 2 ++
 rasa/core/policies/ted_policy.py        | 4 ++--
 rasa/nlu/classifiers/diet_classifier.py | 4 ++--
 rasa/shared/constants.py                | 3 ---
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/rasa/constants.py b/rasa/constants.py
index e4f2697bbcf0..8bbd390bc577 100644
--- a/rasa/constants.py
+++ b/rasa/constants.py
@@ -39,3 +39,5 @@
 ENV_GPU_CONFIG = "TF_GPU_MEMORY_ALLOC"
 ENV_CPU_INTER_OP_CONFIG = "TF_INTER_OP_PARALLELISM_THREADS"
 ENV_CPU_INTRA_OP_CONFIG = "TF_INTRA_OP_PARALLELISM_THREADS"
+
+DIAGNOSTIC_DATA = "diagnostic_data"
diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index a25407547715..fad673a09711 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -22,7 +22,7 @@
 from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter
 from rasa.core.policies.policy import Policy
 from rasa.core.constants import DEFAULT_POLICY_PRIORITY, DIALOGUE
-from rasa.shared.constants import DIAGNOSTIC_DATA, ATTENTION_WEIGHTS
+from rasa.constants import DIAGNOSTIC_DATA
 from rasa.shared.core.constants import ACTIVE_LOOP, SLOTS
 from rasa.shared.core.trackers import DialogueStateTracker
 from rasa.shared.core.generator import TrackerWithCachedStates
@@ -857,5 +857,5 @@ def batch_predict(
 
         return {
             "action_scores": scores,
-            DIAGNOSTIC_DATA: {ATTENTION_WEIGHTS: attention_weights},
+            DIAGNOSTIC_DATA: {"attention_weights": attention_weights},
         }
diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 7bbf2552b965..7462e3835fcc 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -35,7 +35,7 @@
     ENTITY_ATTRIBUTE_ROLE,
     NO_ENTITY_TAG,
 )
-from rasa.shared.constants import DIAGNOSTIC_DATA, ATTENTION_WEIGHTS
+from rasa.constants import DIAGNOSTIC_DATA
 from rasa.nlu.config import RasaNLUModelConfig, InvalidConfigError
 from rasa.shared.nlu.training_data.training_data import TrainingData
 from rasa.shared.nlu.training_data.message import Message
@@ -1768,7 +1768,7 @@ def batch_predict(
 
         predictions: Dict[Text, tf.Tensor] = {}
 
-        predictions[DIAGNOSTIC_DATA] = {ATTENTION_WEIGHTS: attention_weights}
+        predictions[DIAGNOSTIC_DATA] = {"attention_weights": attention_weights}
 
         if self.config[INTENT_CLASSIFICATION]:
             predictions.update(
diff --git a/rasa/shared/constants.py b/rasa/shared/constants.py
index 1aa3b14583df..91f2b14e7940 100644
--- a/rasa/shared/constants.py
+++ b/rasa/shared/constants.py
@@ -72,6 +72,3 @@
 DEFAULT_NLU_RESULTS_PATH = "nlu_comparison_results"
 DEFAULT_CORE_SUBDIRECTORY_NAME = "core"
 DEFAULT_NLU_SUBDIRECTORY_NAME = "nlu"
-
-DIAGNOSTIC_DATA = "diagnostic_data"
-ATTENTION_WEIGHTS = "attention_weights"

From 2501deacd8193e1830d5ffedf1523fc61a6d80b7 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Thu, 29 Oct 2020 12:09:02 +0100
Subject: [PATCH 016/102] Fix formatting

---
 rasa/utils/tensorflow/transformer.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/rasa/utils/tensorflow/transformer.py b/rasa/utils/tensorflow/transformer.py
index 7bd1f929546b..67ea7465cd6e 100644
--- a/rasa/utils/tensorflow/transformer.py
+++ b/rasa/utils/tensorflow/transformer.py
@@ -480,10 +480,8 @@ def call(
             ffn_out = layer(ffn_out, training=training)
         x += ffn_out
 
-        return (
-            x,
-            attn_weights,
-        )  # (batch_size, length, units), (batch_size, num_heads, length, length)
+        # (batch_size, length, units), (batch_size, num_heads, length, length)
+        return x, attn_weights
 
 
 class TransformerEncoder(tf.keras.layers.Layer):
@@ -636,7 +634,5 @@ def call(
         # a whole stack of unnormalized layer outputs.
         x = self._layer_norm(x)  # (batch_size, length, units)
 
-        return (
-            x,
-            tf.stack(layer_attention_weights),
-        )  # (batch_size, length, units), (num_layers, batch_size, num_heads, length, length)
+        # (batch_size, length, units), (num_layers, batch_size, num_heads, length, length)
+        return x, tf.stack(layer_attention_weights)

From 9e2beeacb6f651a410a12984a86d793f62b9f221 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Thu, 29 Oct 2020 18:43:41 +0100
Subject: [PATCH 017/102] Fix _prepare_transformer_layer

---
 rasa/utils/tensorflow/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py
index 2f0270da7466..2e2261dd670c 100644
--- a/rasa/utils/tensorflow/models.py
+++ b/rasa/utils/tensorflow/models.py
@@ -722,7 +722,7 @@ def _prepare_transformer_layer(
             )
         else:
             # create lambda so that it can be used later without the check
-            self._tf_layers[f"{prefix}.{name}"] = lambda x, mask, training: x
+            self._tf_layers[f"{prefix}.{name}"] = lambda x, mask, training: x, None
 
     def _prepare_dot_product_loss(
         self, name: Text, scale_loss: bool, prefix: Text = "loss"

From bf7f08a0ad7c8fc3c5dcd67376dc51de5be4c1e1 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 3 Nov 2020 12:52:39 +0100
Subject: [PATCH 018/102] Return diagnostics with `process`

---
 rasa/nlu/classifiers/diet_classifier.py |  7 +------
 rasa/nlu/components.py                  | 22 +++-------------------
 2 files changed, 4 insertions(+), 25 deletions(-)

diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index bf2903ae1123..5419ffd5624a 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -892,12 +892,7 @@ def _entity_label_to_tags(
 
         return predicted_tags, confidence_values
 
-    def process(self, message: Message, **kwargs: Any) -> None:
-        """Augment the message with intents and entities."""
-
-        self.process_with_diagnostics(message, **kwargs)
-
-    def process_with_diagnostics(
+    def process(
         self, message: Message, **kwargs: Any
     ) -> Optional[Dict[Text, Any]]:
         """Augment the message with intents and entities and return diagnostic data."""
diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py
index eea794c37cc5..e606a5c8ef17 100644
--- a/rasa/nlu/components.py
+++ b/rasa/nlu/components.py
@@ -544,8 +544,8 @@ def train(
 
         pass
 
-    def process(self, message: Message, **kwargs: Any) -> None:
-        """Process an incoming message.
+    def process(self, message: Message, **kwargs: Any) -> Dict[Text, Any]:
+        """Process an incoming message and return diagnostic data.
 
         This is the components chance to process an incoming
         message. The component can rely on
@@ -556,21 +556,6 @@ def process(self, message: Message, **kwargs: Any) -> None:
         :meth:`rasa.nlu.components.Component.process`
         of components previous to this one.
 
-        Args:
-            message: The :class:`rasa.shared.nlu.training_data.message.Message` to process.
-
-        """
-
-        pass
-
-    def process_with_diagnostics(
-        self, message: Message, **kwargs: Any
-    ) -> Optional[Dict[Text, Any]]:
-        """Process an incoming message and return diagnostic data.
-
-        This is the same as :meth:`rasa.nlu.components.Component.process`
-        but returns diagnostic data that may be used for debugging.
-
         Args:
             message: The :class:`rasa.shared.nlu.training_data.message.Message` to process.
 
@@ -579,8 +564,7 @@ def process_with_diagnostics(
 
         """
 
-        self.process(message, **kwargs)
-        return None
+        pass
 
     def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]:
         """Persist this component to disk for future loading.

From d30123ce51fae8bed62ca32ad0fd70be0a2eac2f Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 3 Nov 2020 13:15:40 +0100
Subject: [PATCH 019/102] Convert diagnostics to numpy

---
 rasa/core/policies/ted_policy.py        |  3 ++-
 rasa/nlu/classifiers/diet_classifier.py |  7 +++----
 rasa/utils/tensorflow/tf_to_numpy.py    | 16 ++++++++++++++++
 3 files changed, 21 insertions(+), 5 deletions(-)
 create mode 100644 rasa/utils/tensorflow/tf_to_numpy.py

diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index fad673a09711..ab9f478ba131 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -30,6 +30,7 @@
 from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
 from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature
 from rasa.utils.tensorflow.model_data_utils import convert_to_data_format
+from rasa.utils.tensorflow.tf_to_numpy import values_to_numpy
 from rasa.utils.tensorflow.constants import (
     LABEL,
     TRANSFORMER_SIZE,
@@ -413,7 +414,7 @@ def predict_action_probabilities_with_diagnostics(
         if self.config[LOSS_TYPE] == SOFTMAX and self.config[RANKING_LENGTH] > 0:
             confidence = train_utils.normalize(confidence, self.config[RANKING_LENGTH])
 
-        return confidence.tolist(), output.get(DIAGNOSTIC_DATA)
+        return confidence.tolist(), values_to_numpy(output.get(DIAGNOSTIC_DATA))
 
     def persist(self, path: Union[Text, Path]) -> None:
         """Persists the policy to a storage."""
diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 5419ffd5624a..75b9a7571649 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -24,6 +24,7 @@
 from rasa.utils.tensorflow import layers
 from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
 from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature
+from rasa.utils.tensorflow.tf_to_numpy import values_to_numpy
 from rasa.nlu.constants import TOKENS_NAMES
 from rasa.shared.nlu.constants import (
     TEXT,
@@ -892,9 +893,7 @@ def _entity_label_to_tags(
 
         return predicted_tags, confidence_values
 
-    def process(
-        self, message: Message, **kwargs: Any
-    ) -> Optional[Dict[Text, Any]]:
+    def process(self, message: Message, **kwargs: Any) -> Optional[Dict[Text, Any]]:
         """Augment the message with intents and entities and return diagnostic data."""
 
         out = self._predict(message)
@@ -910,7 +909,7 @@ def process(
 
             message.set(ENTITIES, entities, add_to_output=True)
 
-        return out.get(DIAGNOSTIC_DATA)
+        return values_to_numpy(out.get(DIAGNOSTIC_DATA))
 
     def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
         """Persist this model into the passed directory.
diff --git a/rasa/utils/tensorflow/tf_to_numpy.py b/rasa/utils/tensorflow/tf_to_numpy.py
new file mode 100644
index 000000000000..c09541af6414
--- /dev/null
+++ b/rasa/utils/tensorflow/tf_to_numpy.py
@@ -0,0 +1,16 @@
+from typing import Any, Dict, Optional
+from tensorflow import Tensor
+
+
+def values_to_numpy(data: Optional[Dict[Any, Any]]) -> Optional[Dict[Any, Any]]:
+    if not data:
+        return data
+
+    return {key: _to_numpy_if_tensor(value) for key, value in data.items()}
+
+
+def _to_numpy_if_tensor(value: Any) -> Any:
+    if isinstance(value, Tensor):
+        return value.numpy()
+    else:
+        return value

From 50447213b39cf26a9e3fd9b73da8325d00e017de Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 3 Nov 2020 14:53:47 +0100
Subject: [PATCH 020/102] Add test for DIETClassifier

---
 rasa/nlu/classifiers/diet_classifier.py       |  2 +-
 tests/nlu/classifiers/test_diet_classifier.py | 37 +++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 75b9a7571649..21b127b25761 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -909,7 +909,7 @@ def process(self, message: Message, **kwargs: Any) -> Optional[Dict[Text, Any]]:
 
             message.set(ENTITIES, entities, add_to_output=True)
 
-        return values_to_numpy(out.get(DIAGNOSTIC_DATA))
+        return values_to_numpy(out.get(DIAGNOSTIC_DATA)) if out else None
 
     def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
         """Persist this model into the passed directory.
diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py
index dbf6e3ab11f0..f4c6cf5362a8 100644
--- a/tests/nlu/classifiers/test_diet_classifier.py
+++ b/tests/nlu/classifiers/test_diet_classifier.py
@@ -445,3 +445,40 @@ async def test_train_persist_load_with_composite_entities(
     assert loaded.pipeline
     text = "I am looking for an italian restaurant"
     assert loaded.parse(text) == trained.parse(text)
+
+
+async def test_process_gives_diagnostic_data(component_builder: ComponentBuilder, tmpdir: Path):
+    """Test if processing a message returns attention weights as numpy array"""
+
+    _config = RasaNLUModelConfig(
+        {
+            "pipeline": [
+                {"name": "WhitespaceTokenizer"},
+                {"name": "CountVectorsFeaturizer"},
+                {"name": "DIETClassifier", RANDOM_SEED: 1, EPOCHS: 1},
+            ],
+            "language": "en",
+        }
+    )
+
+    (trainer, trained, persisted_path) = await train(
+        _config,
+        path=tmpdir.strpath,
+        data="data/test/many_intents.md",
+        component_builder=component_builder,
+    )
+
+    assert trainer.pipeline
+    assert trained.pipeline
+
+    loaded = Interpreter.load(persisted_path, component_builder)
+
+    message = Message(data={TEXT: "hello"})
+    diagnostic_data = None
+    for component in loaded.pipeline:
+        diagnostic_data = component.process(message)
+
+    # The last component is DIETClassifier, which should return attention weights
+    assert isinstance(diagnostic_data, dict)
+    assert "attention_weights" in diagnostic_data
+    assert isinstance(diagnostic_data.get("attention_weights"), np.ndarray)

From 66120bc5b0d25167032e320c59ddb8e25246b916 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 3 Nov 2020 18:05:44 +0100
Subject: [PATCH 021/102] Add tests and fix TEDPolicy

---
 rasa/core/policies/ensemble.py                |  4 +-
 rasa/core/policies/policy.py                  | 29 +-----------
 rasa/core/policies/ted_policy.py              | 16 -------
 tests/core/policies/test_ted_policy.py        | 46 +++++++++++++++++++
 tests/core/test_policies.py                   |  4 +-
 tests/nlu/classifiers/test_diet_classifier.py |  4 +-
 6 files changed, 55 insertions(+), 48 deletions(-)
 create mode 100644 tests/core/policies/test_ted_policy.py

diff --git a/rasa/core/policies/ensemble.py b/rasa/core/policies/ensemble.py
index 423237a16d36..a6edec9065cf 100644
--- a/rasa/core/policies/ensemble.py
+++ b/rasa/core/policies/ensemble.py
@@ -597,7 +597,7 @@ def _get_prediction(
             len(arguments) > number_of_arguments_in_rasa_1_0
             and "interpreter" in arguments
         ):
-            probabilities = policy.predict_action_probabilities(
+            probabilities, _ = policy.predict_action_probabilities(
                 tracker, domain, interpreter
             )
         else:
@@ -608,7 +608,7 @@ def _get_prediction(
                 "adapt your custom `Policy` implementation.",
                 category=DeprecationWarning,
             )
-            probabilities = policy.predict_action_probabilities(
+            probabilities, _ = policy.predict_action_probabilities(
                 tracker, domain, RegexInterpreter()
             )
 
diff --git a/rasa/core/policies/policy.py b/rasa/core/policies/policy.py
index af471f2900e8..ec8968be59c6 100644
--- a/rasa/core/policies/policy.py
+++ b/rasa/core/policies/policy.py
@@ -213,7 +213,7 @@ def predict_action_probabilities(
         domain: Domain,
         interpreter: NaturalLanguageInterpreter,
         **kwargs: Any,
-    ) -> List[float]:
+    ) -> Tuple[List[float], Optional[Dict[Text, Any]]]:
         """Predicts the next action the bot should take after seeing the tracker.
 
         Args:
@@ -223,36 +223,11 @@ def predict_action_probabilities(
                 additional features.
 
         Returns:
-             the list of probabilities for the next actions
+             The list of probabilities for the next actions and an optional dict for diagnostic data
         """
 
         raise NotImplementedError("Policy must have the capacity to predict.")
 
-    def predict_action_probabilities_with_diagnostics(
-        self,
-        tracker: DialogueStateTracker,
-        domain: Domain,
-        interpreter: NaturalLanguageInterpreter,
-        **kwargs: Any,
-    ) -> Tuple[List[float], Optional[Dict[Text, Any]]]:
-        """Predicts the next action and diagnostic data for debugging.
-
-        Args:
-            tracker: the :class:`rasa.core.trackers.DialogueStateTracker`
-            domain: the :class:`rasa.shared.core.domain.Domain`
-            interpreter: Interpreter which may be used by the policies to create
-                additional features.
-
-        Returns:
-             The list of probabilities for the next actions and a dictionary of
-             diagnostic data (or None).
-        """
-
-        return (
-            self.predict_action_probabilities(tracker, domain, interpreter, **kwargs),
-            None,
-        )
-
     def _metadata(self) -> Optional[Dict[Text, Any]]:
         """Returns this policy's attributes that should be persisted.
 
diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index ab9f478ba131..46496f6a91af 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -374,22 +374,6 @@ def predict_action_probabilities(
         domain: Domain,
         interpreter: NaturalLanguageInterpreter,
         **kwargs: Any,
-    ) -> List[float]:
-        """Predict the next action the bot should take.
-        Return the list of probabilities for the next actions.
-        """
-
-        action_probabilities, _ = self.predict_action_probabilities_with_diagnostics(
-            tracker, domain, interpreter, **kwargs
-        )
-        return action_probabilities
-
-    def predict_action_probabilities_with_diagnostics(
-        self,
-        tracker: DialogueStateTracker,
-        domain: Domain,
-        interpreter: NaturalLanguageInterpreter,
-        **kwargs: Any,
     ) -> Tuple[List[float], Optional[Dict[Text, Any]]]:
         """Predict the next action the bot should take.
         Return the list of probabilities for the next actions and
diff --git a/tests/core/policies/test_ted_policy.py b/tests/core/policies/test_ted_policy.py
new file mode 100644
index 000000000000..2745c3f7b92c
--- /dev/null
+++ b/tests/core/policies/test_ted_policy.py
@@ -0,0 +1,46 @@
+from pathlib import Path
+from typing import List, Text
+
+import numpy as np
+import pytest
+from rasa.core.policies.ted_policy import TEDPolicy
+from rasa.shared.core.constants import ACTION_LISTEN_NAME
+from rasa.shared.core.domain import Domain
+from rasa.shared.core.events import ActionExecuted, UserUttered
+from rasa.shared.core.trackers import DialogueStateTracker
+from rasa.shared.nlu.interpreter import RegexInterpreter
+
+UTTER_GREET_ACTION = "utter_greet"
+GREET_INTENT_NAME = "greet"
+DOMAIN_YAML = f"""
+intents:
+- {GREET_INTENT_NAME}
+actions:
+- {UTTER_GREET_ACTION}
+"""
+
+
+def test_diagnostics():
+    domain = Domain.from_yaml(DOMAIN_YAML)
+    policy = TEDPolicy()
+    GREET_RULE = DialogueStateTracker.from_events(
+        "greet rule",
+        evts=[
+            UserUttered(intent={"name": GREET_INTENT_NAME}),
+            ActionExecuted(UTTER_GREET_ACTION),
+            ActionExecuted(ACTION_LISTEN_NAME),
+            UserUttered(intent={"name": GREET_INTENT_NAME}),
+            ActionExecuted(ACTION_LISTEN_NAME),
+        ],
+    )
+    policy.train([GREET_RULE], domain, RegexInterpreter())
+    (
+        action_probabilities,
+        diagnostic_data,
+    ) = policy.predict_action_probabilities(
+        GREET_RULE, domain, RegexInterpreter()
+    )
+
+    assert isinstance(diagnostic_data, dict)
+    assert "attention_weights" in diagnostic_data
+    assert isinstance(diagnostic_data.get("attention_weights"), np.ndarray)
diff --git a/tests/core/test_policies.py b/tests/core/test_policies.py
index c09db3a20e9c..492a97572df1 100644
--- a/tests/core/test_policies.py
+++ b/tests/core/test_policies.py
@@ -131,10 +131,10 @@ async def test_persist_and_load(
         trackers = await train_trackers(default_domain, augmentation_factor=20)
 
         for tracker in trackers:
-            predicted_probabilities = loaded.predict_action_probabilities(
+            predicted_probabilities, _ = loaded.predict_action_probabilities(
                 tracker, default_domain, RegexInterpreter()
             )
-            actual_probabilities = trained_policy.predict_action_probabilities(
+            actual_probabilities, _ = trained_policy.predict_action_probabilities(
                 tracker, default_domain, RegexInterpreter()
             )
             assert predicted_probabilities == actual_probabilities
diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py
index f4c6cf5362a8..043abc02f367 100644
--- a/tests/nlu/classifiers/test_diet_classifier.py
+++ b/tests/nlu/classifiers/test_diet_classifier.py
@@ -447,7 +447,9 @@ async def test_train_persist_load_with_composite_entities(
     assert loaded.parse(text) == trained.parse(text)
 
 
-async def test_process_gives_diagnostic_data(component_builder: ComponentBuilder, tmpdir: Path):
+async def test_process_gives_diagnostic_data(
+    component_builder: ComponentBuilder, tmpdir: Path
+):
     """Test if processing a message returns attention weights as numpy array"""
 
     _config = RasaNLUModelConfig(

From 06319dd949d16d5a08c3bd77a1ff92c133f52ede Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Mon, 16 Nov 2020 18:18:15 +0100
Subject: [PATCH 022/102] Apply BLACK formatting

---
 rasa/core/policies/ted_policy.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index aef31b4d05aa..e4c1b3e65676 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -397,7 +397,10 @@ def predict_action_probabilities(
         if self.config[LOSS_TYPE] == SOFTMAX and self.config[RANKING_LENGTH] > 0:
             confidence = train_utils.normalize(confidence, self.config[RANKING_LENGTH])
 
-        return self._prediction(confidence.tolist(), diagnostic_data=values_to_numpy(output.get(DIAGNOSTIC_DATA)))
+        return self._prediction(
+            confidence.tolist(),
+            diagnostic_data=values_to_numpy(output.get(DIAGNOSTIC_DATA)),
+        )
 
     def persist(self, path: Union[Text, Path]) -> None:
         """Persists the policy to a storage."""

From 08b9f71017c7f088b188cc2b1c468235ab3567fb Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Mon, 16 Nov 2020 18:25:13 +0100
Subject: [PATCH 023/102] Add doc-string

---
 rasa/utils/tensorflow/tf_to_numpy.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/rasa/utils/tensorflow/tf_to_numpy.py b/rasa/utils/tensorflow/tf_to_numpy.py
index c09541af6414..8f034a32eb75 100644
--- a/rasa/utils/tensorflow/tf_to_numpy.py
+++ b/rasa/utils/tensorflow/tf_to_numpy.py
@@ -3,6 +3,15 @@
 
 
 def values_to_numpy(data: Optional[Dict[Any, Any]]) -> Optional[Dict[Any, Any]]:
+    """Replace all tensorflow-tensor values with their numpy versions
+
+    Args:
+        data: Any dictionary for which values should be converted.
+
+    Returns:
+        A dictionary identical to `data` except that tensor values are 
+        replaced by their corresponding numpy arrays.
+    """
     if not data:
         return data
 

From 9c393ee27bfc944ca0b90aebd7266cb61163e40e Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Mon, 16 Nov 2020 18:46:56 +0100
Subject: [PATCH 024/102] Fix tests

---
 tests/core/policies/test_ted_policy.py | 11 ++++-------
 tests/core/test_policies.py            |  4 ++--
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/tests/core/policies/test_ted_policy.py b/tests/core/policies/test_ted_policy.py
index 2745c3f7b92c..be5aaa369f79 100644
--- a/tests/core/policies/test_ted_policy.py
+++ b/tests/core/policies/test_ted_policy.py
@@ -34,13 +34,10 @@ def test_diagnostics():
         ],
     )
     policy.train([GREET_RULE], domain, RegexInterpreter())
-    (
-        action_probabilities,
-        diagnostic_data,
-    ) = policy.predict_action_probabilities(
+    prediction = policy.predict_action_probabilities(
         GREET_RULE, domain, RegexInterpreter()
     )
 
-    assert isinstance(diagnostic_data, dict)
-    assert "attention_weights" in diagnostic_data
-    assert isinstance(diagnostic_data.get("attention_weights"), np.ndarray)
+    assert prediction.diagnostic_data
+    assert "attention_weights" in prediction.diagnostic_data
+    assert isinstance(prediction.diagnostic_data.get("attention_weights"), np.ndarray)
diff --git a/tests/core/test_policies.py b/tests/core/test_policies.py
index 752ac4cedff2..b1a65f7e5d0f 100644
--- a/tests/core/test_policies.py
+++ b/tests/core/test_policies.py
@@ -144,10 +144,10 @@ async def test_persist_and_load(
         trackers = await train_trackers(default_domain, augmentation_factor=20)
 
         for tracker in trackers:
-            predicted_probabilities, _ = loaded.predict_action_probabilities(
+            predicted_probabilities = loaded.predict_action_probabilities(
                 tracker, default_domain, RegexInterpreter()
             )
-            actual_probabilities, _ = trained_policy.predict_action_probabilities(
+            actual_probabilities = trained_policy.predict_action_probabilities(
                 tracker, default_domain, RegexInterpreter()
             )
             assert predicted_probabilities == actual_probabilities

From c3c8bdec7f00fc0c0921e542c3f4b25fc4ec1597 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Mon, 16 Nov 2020 18:52:43 +0100
Subject: [PATCH 025/102] Add changelog

---
 changelog/5673.improvement.md | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 changelog/5673.improvement.md

diff --git a/changelog/5673.improvement.md b/changelog/5673.improvement.md
new file mode 100644
index 000000000000..56f981440b96
--- /dev/null
+++ b/changelog/5673.improvement.md
@@ -0,0 +1,4 @@
+Return diagnostic data for action and NLU predictions.
+
+DIET and TED now both expose the attention weights of their transformer layers.
+This can be used for debugging and fine-tuning, e.g. with RasaLit.

From 2d4776de949c4f882d977bf7963e5e430221f1e1 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Mon, 16 Nov 2020 19:09:47 +0100
Subject: [PATCH 026/102] Add `text_transformed` diagnostic

---
 rasa/nlu/classifiers/diet_classifier.py       | 5 ++++-
 tests/nlu/classifiers/test_diet_classifier.py | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index a4aee69f0a94..30e4d61bba29 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -1772,7 +1772,10 @@ def batch_predict(
 
         predictions: Dict[Text, tf.Tensor] = {}
 
-        predictions[DIAGNOSTIC_DATA] = {"attention_weights": attention_weights}
+        predictions[DIAGNOSTIC_DATA] = {
+            "attention_weights": attention_weights,
+            "text_transformed": text_transformed,
+        }
 
         if self.config[INTENT_CLASSIFICATION]:
             predictions.update(
diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py
index 043abc02f367..29cab891bebc 100644
--- a/tests/nlu/classifiers/test_diet_classifier.py
+++ b/tests/nlu/classifiers/test_diet_classifier.py
@@ -484,3 +484,5 @@ async def test_process_gives_diagnostic_data(
     assert isinstance(diagnostic_data, dict)
     assert "attention_weights" in diagnostic_data
     assert isinstance(diagnostic_data.get("attention_weights"), np.ndarray)
+    assert "text_transformed" in diagnostic_data
+    assert isinstance(diagnostic_data.get("text_transformed"), np.ndarray)

From 44d33e825e68cbcfbac8298798717b0b49c98d1c Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Mon, 16 Nov 2020 19:22:23 +0100
Subject: [PATCH 027/102] Remove trailing whitespaces

---
 rasa/core/policies/policy.py         | 2 +-
 rasa/utils/tensorflow/tf_to_numpy.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/rasa/core/policies/policy.py b/rasa/core/policies/policy.py
index dfeb02b261a2..5b3c04cb7485 100644
--- a/rasa/core/policies/policy.py
+++ b/rasa/core/policies/policy.py
@@ -390,7 +390,7 @@ def __init__(
             is_end_to_end_prediction: `True` if the prediction used the text of the
                 user message instead of the intent.
             diagnostic_data: Intermediate results or other information that is not
-                necessary for Rasa to function, but intended for debuging and 
+                necessary for Rasa to function, but intended for debuging and
                 fine-tuning purposes.
         """
         self.probabilities = probabilities
diff --git a/rasa/utils/tensorflow/tf_to_numpy.py b/rasa/utils/tensorflow/tf_to_numpy.py
index 8f034a32eb75..57d096bd2c7f 100644
--- a/rasa/utils/tensorflow/tf_to_numpy.py
+++ b/rasa/utils/tensorflow/tf_to_numpy.py
@@ -9,7 +9,7 @@ def values_to_numpy(data: Optional[Dict[Any, Any]]) -> Optional[Dict[Any, Any]]:
         data: Any dictionary for which values should be converted.
 
     Returns:
-        A dictionary identical to `data` except that tensor values are 
+        A dictionary identical to `data` except that tensor values are
         replaced by their corresponding numpy arrays.
     """
     if not data:

From 891fdcd75c82a22980152bfc539bf5bf4a58a391 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 17 Nov 2020 10:24:38 +0100
Subject: [PATCH 028/102] Code formatting

---
 rasa/core/policies/ted_policy.py              | 2 --
 rasa/nlu/classifiers/diet_classifier.py       | 2 --
 rasa/nlu/components.py                        | 8 --------
 rasa/utils/tensorflow/models.py               | 1 -
 rasa/utils/tensorflow/tf_to_numpy.py          | 2 +-
 rasa/utils/tensorflow/transformer.py          | 1 -
 tests/nlu/classifiers/test_diet_classifier.py | 2 +-
 7 files changed, 2 insertions(+), 16 deletions(-)

diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index e4c1b3e65676..c5e5d4c9c643 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -670,7 +670,6 @@ def _emebed_dialogue(
         self, dialogue_in: tf.Tensor, sequence_lengths: tf.Tensor
     ) -> Tuple[tf.Tensor, tf.Tensor, Optional[tf.Tensor]]:
         """Create dialogue level embedding and mask."""
-
         mask = self._compute_mask(sequence_lengths)
 
         dialogue_transformed, attention_weights = self._tf_layers[
@@ -700,7 +699,6 @@ def _encode_features_per_attribute(
         Returns:
             A tensor combining  all features for `attribute`
         """
-
         if not tf_batch_data[attribute]:
             return None
 
diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 30e4d61bba29..f5c5e5074a0a 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -896,7 +896,6 @@ def _entity_label_to_tags(
 
     def process(self, message: Message, **kwargs: Any) -> Optional[Dict[Text, Any]]:
         """Augment the message with intents and entities and return diagnostic data."""
-
         out = self._predict(message)
 
         if self.component_config[INTENT_CLASSIFICATION]:
@@ -917,7 +916,6 @@ def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
 
         Return the metadata necessary to load the model again.
         """
-
         if self.model is None:
             return {"file": None}
 
diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py
index 0d5e173c8664..636355e5c638 100644
--- a/rasa/nlu/components.py
+++ b/rasa/nlu/components.py
@@ -463,7 +463,6 @@ def required_packages(cls) -> List[Text]:
         Returns:
             The list of required package names.
         """
-
         return []
 
     @classmethod
@@ -493,7 +492,6 @@ def load(
         Returns:
             the loaded component
         """
-
         if cached_component:
             return cached_component
 
@@ -514,7 +512,6 @@ def create(
         Returns:
             The created component.
         """
-
         # Check language supporting
         language = config.language
         if not cls.can_handle_language(language):
@@ -539,7 +536,6 @@ def provide_context(self) -> Optional[Dict[Text, Any]]:
         Returns:
             The updated component configuration.
         """
-
         pass
 
     def train(
@@ -565,7 +561,6 @@ def train(
             config: The model configuration parameters.
 
         """
-
         pass
 
     def process(self, message: Message, **kwargs: Any) -> Dict[Text, Any]:
@@ -587,7 +582,6 @@ def process(self, message: Message, **kwargs: Any) -> Dict[Text, Any]:
             A dictionary of diagnostic data such as attention weights, or None.
 
         """
-
         pass
 
     def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]:
@@ -600,7 +594,6 @@ def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]
         Returns:
             An optional dictionary with any information about the stored model.
         """
-
         pass
 
     @classmethod
@@ -621,7 +614,6 @@ def cache_key(
         Returns:
             A unique caching key.
         """
-
         return None
 
     def __getstate__(self) -> Any:
diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py
index 2e2261dd670c..e711fbf87483 100644
--- a/rasa/utils/tensorflow/models.py
+++ b/rasa/utils/tensorflow/models.py
@@ -519,7 +519,6 @@ def batch_to_model_data_format(
         data, shape) before, this method converts them into sparse tensors. Dense
         data is kept.
         """
-
         batch_data = defaultdict(lambda: defaultdict(list))
 
         idx = 0
diff --git a/rasa/utils/tensorflow/tf_to_numpy.py b/rasa/utils/tensorflow/tf_to_numpy.py
index 57d096bd2c7f..19dd45de3c5b 100644
--- a/rasa/utils/tensorflow/tf_to_numpy.py
+++ b/rasa/utils/tensorflow/tf_to_numpy.py
@@ -3,7 +3,7 @@
 
 
 def values_to_numpy(data: Optional[Dict[Any, Any]]) -> Optional[Dict[Any, Any]]:
-    """Replace all tensorflow-tensor values with their numpy versions
+    """Replace all tensorflow-tensor values with their numpy versions.
 
     Args:
         data: Any dictionary for which values should be converted.
diff --git a/rasa/utils/tensorflow/transformer.py b/rasa/utils/tensorflow/transformer.py
index 67ea7465cd6e..4c0332a08aff 100644
--- a/rasa/utils/tensorflow/transformer.py
+++ b/rasa/utils/tensorflow/transformer.py
@@ -606,7 +606,6 @@ def call(
         Returns:
             Transformer encoder output with shape [batch_size, length, units]
         """
-
         # adding embedding and position encoding.
         x = self._embedding(x)  # (batch_size, length, units)
         x *= tf.math.sqrt(tf.cast(self.units, tf.float32))
diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py
index 29cab891bebc..f1d6dc4128c4 100644
--- a/tests/nlu/classifiers/test_diet_classifier.py
+++ b/tests/nlu/classifiers/test_diet_classifier.py
@@ -450,7 +450,7 @@ async def test_train_persist_load_with_composite_entities(
 async def test_process_gives_diagnostic_data(
     component_builder: ComponentBuilder, tmpdir: Path
 ):
-    """Test if processing a message returns attention weights as numpy array"""
+    """Test if processing a message returns attention weights as numpy array."""
 
     _config = RasaNLUModelConfig(
         {

From 871e765d7677a9d8f16c04c67afcc38da38a8e2a Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 17 Nov 2020 11:03:27 +0100
Subject: [PATCH 029/102] Don't compare diagnostic data for eq

---
 rasa/core/policies/policy.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/rasa/core/policies/policy.py b/rasa/core/policies/policy.py
index 5b3c04cb7485..fa7211a78304 100644
--- a/rasa/core/policies/policy.py
+++ b/rasa/core/policies/policy.py
@@ -399,7 +399,7 @@ def __init__(
         self.events = events or []
         self.optional_events = optional_events or []
         self.is_end_to_end_prediction = is_end_to_end_prediction
-        self.diagnostic_data = diagnostic_data
+        self.diagnostic_data = diagnostic_data or dict()
 
     @staticmethod
     def for_action_name(
@@ -442,7 +442,6 @@ def __eq__(self, other: Any) -> bool:
             and self.events == other.events
             and self.optional_events == other.events
             and self.is_end_to_end_prediction == other.is_end_to_end_prediction
-            and self.diagnostic_data == other.diagnostic_data
         )
 
     @property

From 09eae2adb5ba6059d08fc56139edfd66299a2fe9 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 17 Nov 2020 11:24:43 +0100
Subject: [PATCH 030/102] Fix equality for  `diagnostic_data`

---
 rasa/core/policies/policy.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/rasa/core/policies/policy.py b/rasa/core/policies/policy.py
index fa7211a78304..c1917ec1205a 100644
--- a/rasa/core/policies/policy.py
+++ b/rasa/core/policies/policy.py
@@ -435,6 +435,21 @@ def __eq__(self, other: Any) -> bool:
         if not isinstance(other, PolicyPrediction):
             return False
 
+        # The `diagnostic_data` values can be numpy arrays, so we have to check
+        # these separately
+        for key, value in self.diagnostic_data.items():
+            if key not in other.diagnostic_data:
+                return False
+            if isinstance(value, np.ndarray) and not np.array_equal(
+                value, other.diagnostic_data[key]
+            ):
+                return False
+            elif (
+                not isinstance(value, np.ndarray)
+                and value != other.diagnostic_data[key]
+            ):
+                return False
+
         return (
             self.probabilities == other.probabilities
             and self.policy_name == other.policy_name

From 04e529fea2a96a557d4e6e18136aa52071467d72 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 17 Nov 2020 11:44:19 +0100
Subject: [PATCH 031/102] Add a docstring

---
 rasa/nlu/components.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py
index 636355e5c638..cde452483514 100644
--- a/rasa/nlu/components.py
+++ b/rasa/nlu/components.py
@@ -617,6 +617,7 @@ def cache_key(
         return None
 
     def __getstate__(self) -> Any:
+        """Get a copy of picklable parts of the component."""
         d = self.__dict__.copy()
         # these properties should not be pickled
         if "partial_processing_context" in d:

From 693781fad7d68de4f4732ef01eb27e42afdde5da Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Wed, 18 Nov 2020 10:08:14 +0100
Subject: [PATCH 032/102] Fix response selector

---
 rasa/nlu/selectors/response_selector.py | 18 ++++++++++++++----
 rasa/utils/tensorflow/models.py         |  2 +-
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py
index 59b8d3b29276..06ced2e93ea4 100644
--- a/rasa/nlu/selectors/response_selector.py
+++ b/rasa/nlu/selectors/response_selector.py
@@ -6,6 +6,8 @@
 
 from typing import Any, Dict, Optional, Text, Tuple, Union, List, Type
 
+from rasa.constants import DIAGNOSTIC_DATA
+from rasa.utils.tensorflow.tf_to_numpy import values_to_numpy
 from rasa.shared.nlu.training_data import util
 import rasa.shared.utils.io
 from rasa.shared.exceptions import InvalidConfigException
@@ -374,7 +376,7 @@ def _resolve_intent_response_key(
                     return search_key
         return None
 
-    def process(self, message: Message, **kwargs: Any) -> None:
+    def process(self, message: Message, **kwargs: Any) -> Optional[Dict[Text, Any]]:
         """Return the most likely response, the associated intent_response_key and its similarity to the input."""
 
         out = self._predict(message)
@@ -434,6 +436,8 @@ def process(self, message: Message, **kwargs: Any) -> None:
 
         self._set_message_property(message, prediction_dict, selector_key)
 
+        return values_to_numpy(out.get(DIAGNOSTIC_DATA)) if out else None
+
     def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
         """Persist this model into the passed directory.
 
@@ -616,7 +620,7 @@ def _create_all_labels(self) -> Tuple[tf.Tensor, tf.Tensor]:
         )
         mask_label = self._compute_mask(sequence_lengths_label)
 
-        label_transformed, _, _, _ = self._create_sequence(
+        label_transformed, _, _, _, _ = self._create_sequence(
             self.tf_label_data[LABEL][SEQUENCE],
             self.tf_label_data[LABEL][SENTENCE],
             sequence_mask_label,
@@ -646,6 +650,7 @@ def batch_loss(
             text_in,
             text_seq_ids,
             lm_mask_bool_text,
+            _,
         ) = self._create_sequence(
             tf_batch_data[TEXT][SEQUENCE],
             tf_batch_data[TEXT][SENTENCE],
@@ -666,7 +671,7 @@ def batch_loss(
         )
         mask_label = self._compute_mask(sequence_lengths_label)
 
-        label_transformed, _, _, _ = self._create_sequence(
+        label_transformed, _, _, _, _ = self._create_sequence(
             tf_batch_data[LABEL][SEQUENCE],
             tf_batch_data[LABEL][SENTENCE],
             sequence_mask_label,
@@ -718,7 +723,7 @@ def batch_predict(
         )
         mask_text = self._compute_mask(sequence_lengths_text)
 
-        text_transformed, _, _, _ = self._create_sequence(
+        text_transformed, _, _, _, attention_weights = self._create_sequence(
             tf_batch_data[TEXT][SEQUENCE],
             tf_batch_data[TEXT][SENTENCE],
             sequence_mask_text,
@@ -728,6 +733,11 @@ def batch_predict(
 
         out = {}
 
+        out[DIAGNOSTIC_DATA] = {
+            "attention_weights": attention_weights,
+            "text_transformed": text_transformed,
+        }
+
         if self.all_labels_embed is None:
             _, self.all_labels_embed = self._create_all_labels()
 
diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py
index e711fbf87483..8d549877cd1d 100644
--- a/rasa/utils/tensorflow/models.py
+++ b/rasa/utils/tensorflow/models.py
@@ -721,7 +721,7 @@ def _prepare_transformer_layer(
             )
         else:
             # create lambda so that it can be used later without the check
-            self._tf_layers[f"{prefix}.{name}"] = lambda x, mask, training: x, None
+            self._tf_layers[f"{prefix}.{name}"] = lambda x, mask, training: (x, None)
 
     def _prepare_dot_product_loss(
         self, name: Text, scale_loss: bool, prefix: Text = "loss"

From 9504e8368ad5b31aa408af5d3ec086e32fb4cf6a Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Wed, 18 Nov 2020 10:27:17 +0100
Subject: [PATCH 033/102] Remove newline after doc string

---
 rasa/nlu/selectors/response_selector.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py
index 06ced2e93ea4..fd72f497ac29 100644
--- a/rasa/nlu/selectors/response_selector.py
+++ b/rasa/nlu/selectors/response_selector.py
@@ -378,7 +378,6 @@ def _resolve_intent_response_key(
 
     def process(self, message: Message, **kwargs: Any) -> Optional[Dict[Text, Any]]:
         """Return the most likely response, the associated intent_response_key and its similarity to the input."""
-
         out = self._predict(message)
         top_label, label_ranking = self._predict_label(out)
 

From 448b12fd0524c244b2ec55a5c1166a1d00dc9bfd Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Wed, 25 Nov 2020 13:59:01 +0100
Subject: [PATCH 034/102] Add diagnostic_data to message

---
 rasa/nlu/classifiers/diet_classifier.py | 7 ++++---
 rasa/nlu/components.py                  | 8 ++------
 rasa/nlu/selectors/response_selector.py | 5 +++--
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index f5c5e5074a0a..4662e15a2a46 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -894,8 +894,8 @@ def _entity_label_to_tags(
 
         return predicted_tags, confidence_values
 
-    def process(self, message: Message, **kwargs: Any) -> Optional[Dict[Text, Any]]:
-        """Augment the message with intents and entities and return diagnostic data."""
+    def process(self, message: Message, **kwargs: Any) -> None:
+        """Augment the message with intents, entities, and diagnostic data."""
         out = self._predict(message)
 
         if self.component_config[INTENT_CLASSIFICATION]:
@@ -909,7 +909,8 @@ def process(self, message: Message, **kwargs: Any) -> Optional[Dict[Text, Any]]:
 
             message.set(ENTITIES, entities, add_to_output=True)
 
-        return values_to_numpy(out.get(DIAGNOSTIC_DATA)) if out else None
+        if out:
+            message.set(DIAGNOSTIC_DATA, values_to_numpy(out.get(DIAGNOSTIC_DATA)))
 
     def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
         """Persist this model into the passed directory.
diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py
index cde452483514..cf4980b52dfb 100644
--- a/rasa/nlu/components.py
+++ b/rasa/nlu/components.py
@@ -563,8 +563,8 @@ def train(
         """
         pass
 
-    def process(self, message: Message, **kwargs: Any) -> Dict[Text, Any]:
-        """Process an incoming message and return diagnostic data.
+    def process(self, message: Message, **kwargs: Any) -> None:
+        """Process an incoming message.
 
         This is the components chance to process an incoming
         message. The component can rely on
@@ -577,10 +577,6 @@ def process(self, message: Message, **kwargs: Any) -> Dict[Text, Any]:
 
         Args:
             message: The :class:`rasa.shared.nlu.training_data.message.Message` to process.
-
-        Returns:
-            A dictionary of diagnostic data such as attention weights, or None.
-
         """
         pass
 
diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py
index fd72f497ac29..613e61fc5c34 100644
--- a/rasa/nlu/selectors/response_selector.py
+++ b/rasa/nlu/selectors/response_selector.py
@@ -376,7 +376,7 @@ def _resolve_intent_response_key(
                     return search_key
         return None
 
-    def process(self, message: Message, **kwargs: Any) -> Optional[Dict[Text, Any]]:
+    def process(self, message: Message, **kwargs: Any) -> None:
         """Return the most likely response, the associated intent_response_key and its similarity to the input."""
         out = self._predict(message)
         top_label, label_ranking = self._predict_label(out)
@@ -435,7 +435,8 @@ def process(self, message: Message, **kwargs: Any) -> Optional[Dict[Text, Any]]:
 
         self._set_message_property(message, prediction_dict, selector_key)
 
-        return values_to_numpy(out.get(DIAGNOSTIC_DATA)) if out else None
+        if out:
+            message.set(DIAGNOSTIC_DATA, values_to_numpy(out.get(DIAGNOSTIC_DATA)))
 
     def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
         """Persist this model into the passed directory.

From 00c5cf0e5a9ebaface45954a899a489ab1946973 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Wed, 25 Nov 2020 17:56:11 +0100
Subject: [PATCH 035/102] Fix test

---
 tests/nlu/classifiers/test_diet_classifier.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py
index f1d6dc4128c4..101ea8e4749b 100644
--- a/tests/nlu/classifiers/test_diet_classifier.py
+++ b/tests/nlu/classifiers/test_diet_classifier.py
@@ -32,6 +32,7 @@
 from rasa.nlu.model import Interpreter
 from rasa.shared.nlu.training_data.message import Message
 from rasa.utils import train_utils
+from rasa.constants import DIAGNOSTIC_DATA
 from tests.conftest import DEFAULT_NLU_DATA
 from tests.nlu.conftest import DEFAULT_DATA_PATH
 
@@ -476,11 +477,12 @@ async def test_process_gives_diagnostic_data(
     loaded = Interpreter.load(persisted_path, component_builder)
 
     message = Message(data={TEXT: "hello"})
-    diagnostic_data = None
     for component in loaded.pipeline:
-        diagnostic_data = component.process(message)
+        component.process(message)
 
-    # The last component is DIETClassifier, which should return attention weights
+    diagnostic_data = message.get(DIAGNOSTIC_DATA)
+
+    # The last component is DIETClassifier, which should add attention weights
     assert isinstance(diagnostic_data, dict)
     assert "attention_weights" in diagnostic_data
     assert isinstance(diagnostic_data.get("attention_weights"), np.ndarray)

From 12f50dad3634d12e52c44a81e2d1c998f50659ee Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Thu, 26 Nov 2020 09:05:23 +0100
Subject: [PATCH 036/102] Handle diagnostic_data from multiple components

---
 rasa/nlu/classifiers/diet_classifier.py       |  4 ++--
 rasa/nlu/selectors/response_selector.py       |  4 ++--
 rasa/shared/nlu/training_data/message.py      | 12 ++++++++++++
 tests/nlu/classifiers/test_diet_classifier.py | 12 +++++++-----
 4 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 4662e15a2a46..1005e460d1cc 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -909,8 +909,8 @@ def process(self, message: Message, **kwargs: Any) -> None:
 
             message.set(ENTITIES, entities, add_to_output=True)
 
-        if out:
-            message.set(DIAGNOSTIC_DATA, values_to_numpy(out.get(DIAGNOSTIC_DATA)))
+        if out and DIAGNOSTIC_DATA in out:
+            message.add_diagnostic_data(self.name, out.get(DIAGNOSTIC_DATA))
 
     def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
         """Persist this model into the passed directory.
diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py
index 613e61fc5c34..2309a95bef84 100644
--- a/rasa/nlu/selectors/response_selector.py
+++ b/rasa/nlu/selectors/response_selector.py
@@ -435,8 +435,8 @@ def process(self, message: Message, **kwargs: Any) -> None:
 
         self._set_message_property(message, prediction_dict, selector_key)
 
-        if out:
-            message.set(DIAGNOSTIC_DATA, values_to_numpy(out.get(DIAGNOSTIC_DATA)))
+        if out and DIAGNOSTIC_DATA in out:
+            message.add_diagnostic_data(self.name, out.get(DIAGNOSTIC_DATA))
 
     def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
         """Persist this model into the passed directory.
diff --git a/rasa/shared/nlu/training_data/message.py b/rasa/shared/nlu/training_data/message.py
index 20d36341d980..c2f6f6b15337 100644
--- a/rasa/shared/nlu/training_data/message.py
+++ b/rasa/shared/nlu/training_data/message.py
@@ -21,6 +21,8 @@
     ACTION_TEXT,
     ACTION_NAME,
 )
+from rasa.constants import DIAGNOSTIC_DATA
+from rasa.utils.tensorflow.tf_to_numpy import values_to_numpy
 
 if typing.TYPE_CHECKING:
     from rasa.shared.nlu.training_data.features import Features
@@ -51,6 +53,16 @@ def add_features(self, features: Optional["Features"]) -> None:
         if features is not None:
             self.features.append(features)
 
+    def add_diagnostic_data(self, origin: Text, data: Dict[Text, Any]) -> None:
+        if origin in self.get(DIAGNOSTIC_DATA, {}):
+            rasa.shared.utils.io.raise_warning(
+                f"Please make sure every pipeline component has a distinct name. "
+                f"The name '{self.name}' appears at least twice and diagnostic data will be overwritten."
+            )
+        self.set(
+            DIAGNOSTIC_DATA, {origin: values_to_numpy(data)}
+        )
+
     def set(self, prop, info, add_to_output=False) -> None:
         self.data[prop] = info
         if add_to_output:
diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py
index 101ea8e4749b..73145a33531d 100644
--- a/tests/nlu/classifiers/test_diet_classifier.py
+++ b/tests/nlu/classifiers/test_diet_classifier.py
@@ -453,12 +453,13 @@ async def test_process_gives_diagnostic_data(
 ):
     """Test if processing a message returns attention weights as numpy array."""
 
+    _name = "DIETClassifier"
     _config = RasaNLUModelConfig(
         {
             "pipeline": [
                 {"name": "WhitespaceTokenizer"},
                 {"name": "CountVectorsFeaturizer"},
-                {"name": "DIETClassifier", RANDOM_SEED: 1, EPOCHS: 1},
+                {"name": _name, RANDOM_SEED: 1, EPOCHS: 1},
             ],
             "language": "en",
         }
@@ -484,7 +485,8 @@ async def test_process_gives_diagnostic_data(
 
     # The last component is DIETClassifier, which should add attention weights
     assert isinstance(diagnostic_data, dict)
-    assert "attention_weights" in diagnostic_data
-    assert isinstance(diagnostic_data.get("attention_weights"), np.ndarray)
-    assert "text_transformed" in diagnostic_data
-    assert isinstance(diagnostic_data.get("text_transformed"), np.ndarray)
+    assert _name in diagnostic_data
+    assert "attention_weights" in diagnostic_data[_name]
+    assert isinstance(diagnostic_data[_name].get("attention_weights"), np.ndarray)
+    assert "text_transformed" in diagnostic_data[_name]
+    assert isinstance(diagnostic_data[_name].get("text_transformed"), np.ndarray)

From 5dd09b7cb8748de28fa20c16739f35b8199e677c Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Thu, 26 Nov 2020 11:05:56 +0100
Subject: [PATCH 037/102] Add doc string

---
 rasa/shared/nlu/training_data/message.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/rasa/shared/nlu/training_data/message.py b/rasa/shared/nlu/training_data/message.py
index c2f6f6b15337..a0b8f8dff0e3 100644
--- a/rasa/shared/nlu/training_data/message.py
+++ b/rasa/shared/nlu/training_data/message.py
@@ -54,14 +54,17 @@ def add_features(self, features: Optional["Features"]) -> None:
             self.features.append(features)
 
     def add_diagnostic_data(self, origin: Text, data: Dict[Text, Any]) -> None:
+        """Add diagnostic data from the component with name `origin`."""
         if origin in self.get(DIAGNOSTIC_DATA, {}):
             rasa.shared.utils.io.raise_warning(
                 f"Please make sure every pipeline component has a distinct name. "
-                f"The name '{self.name}' appears at least twice and diagnostic data will be overwritten."
+                f"The name '{self.name}' appears at least twice and diagnostic "
+                f"data will be overwritten."
             )
-        self.set(
-            DIAGNOSTIC_DATA, {origin: values_to_numpy(data)}
-        )
+        if DIAGNOSTIC_DATA in self.data:
+            self.data[DIAGNOSTIC_DATA][origin] = values_to_numpy(data)
+        else:
+            self.data[DIAGNOSTIC_DATA] = {origin: values_to_numpy(data)}
 
     def set(self, prop, info, add_to_output=False) -> None:
         self.data[prop] = info

From 4593e3a6909972bacc43407e17cf4d13fd247d47 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Thu, 26 Nov 2020 11:38:54 +0100
Subject: [PATCH 038/102] Move message stuff to shared

---
 rasa/constants.py                             |  2 -
 rasa/core/policies/ted_policy.py              |  4 +-
 rasa/nlu/classifiers/diet_classifier.py       |  4 +-
 rasa/nlu/selectors/response_selector.py       |  4 +-
 rasa/shared/constants.py                      |  2 +
 rasa/shared/nlu/training_data/message.py      |  4 +-
 .../utils/tensorflow/tf_to_numpy.py           |  0
 tests/nlu/classifiers/test_diet_classifier.py |  2 +-
 tests/nlu/selectors/test_selectors.py         | 54 +++++++++++++++++++
 9 files changed, 65 insertions(+), 11 deletions(-)
 rename rasa/{ => shared}/utils/tensorflow/tf_to_numpy.py (100%)

diff --git a/rasa/constants.py b/rasa/constants.py
index c237da506e46..1661d0096a82 100644
--- a/rasa/constants.py
+++ b/rasa/constants.py
@@ -39,5 +39,3 @@
 ENV_GPU_CONFIG = "TF_GPU_MEMORY_ALLOC"
 ENV_CPU_INTER_OP_CONFIG = "TF_INTER_OP_PARALLELISM_THREADS"
 ENV_CPU_INTRA_OP_CONFIG = "TF_INTRA_OP_PARALLELISM_THREADS"
-
-DIAGNOSTIC_DATA = "diagnostic_data"
diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index c5e5d4c9c643..1a0fc1f5e116 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -22,7 +22,7 @@
 from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter
 from rasa.core.policies.policy import Policy, PolicyPrediction
 from rasa.core.constants import DEFAULT_POLICY_PRIORITY, DIALOGUE
-from rasa.constants import DIAGNOSTIC_DATA
+from rasa.shared.constants import DIAGNOSTIC_DATA
 from rasa.shared.core.constants import ACTIVE_LOOP, SLOTS
 from rasa.shared.core.trackers import DialogueStateTracker
 from rasa.shared.core.generator import TrackerWithCachedStates
@@ -30,7 +30,7 @@
 from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
 from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature
 from rasa.utils.tensorflow.model_data_utils import convert_to_data_format
-from rasa.utils.tensorflow.tf_to_numpy import values_to_numpy
+from rasa.shared.utils.tensorflow.tf_to_numpy import values_to_numpy
 from rasa.utils.tensorflow.constants import (
     LABEL,
     TRANSFORMER_SIZE,
diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 1005e460d1cc..82cf1d0adc0a 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -14,7 +14,7 @@
 import rasa.shared.utils.io
 import rasa.utils.io as io_utils
 import rasa.nlu.utils.bilou_utils as bilou_utils
-from rasa.constants import DIAGNOSTIC_DATA
+from rasa.shared.constants import DIAGNOSTIC_DATA
 from rasa.nlu.featurizers.featurizer import Featurizer
 from rasa.nlu.components import Component
 from rasa.nlu.classifiers.classifier import IntentClassifier
@@ -25,7 +25,7 @@
 from rasa.utils.tensorflow import layers
 from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
 from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature
-from rasa.utils.tensorflow.tf_to_numpy import values_to_numpy
+from rasa.shared.utils.tensorflow.tf_to_numpy import values_to_numpy
 from rasa.nlu.constants import TOKENS_NAMES
 from rasa.shared.nlu.constants import (
     TEXT,
diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py
index 2309a95bef84..72461e2cacdc 100644
--- a/rasa/nlu/selectors/response_selector.py
+++ b/rasa/nlu/selectors/response_selector.py
@@ -6,8 +6,8 @@
 
 from typing import Any, Dict, Optional, Text, Tuple, Union, List, Type
 
-from rasa.constants import DIAGNOSTIC_DATA
-from rasa.utils.tensorflow.tf_to_numpy import values_to_numpy
+from rasa.shared.constants import DIAGNOSTIC_DATA
+from rasa.shared.utils.tensorflow.tf_to_numpy import values_to_numpy
 from rasa.shared.nlu.training_data import util
 import rasa.shared.utils.io
 from rasa.shared.exceptions import InvalidConfigException
diff --git a/rasa/shared/constants.py b/rasa/shared/constants.py
index 4c240bef3de0..cbc17e7c1408 100644
--- a/rasa/shared/constants.py
+++ b/rasa/shared/constants.py
@@ -73,3 +73,5 @@
 DEFAULT_NLU_RESULTS_PATH = "nlu_comparison_results"
 DEFAULT_CORE_SUBDIRECTORY_NAME = "core"
 DEFAULT_NLU_SUBDIRECTORY_NAME = "nlu"
+
+DIAGNOSTIC_DATA = "diagnostic_data"
diff --git a/rasa/shared/nlu/training_data/message.py b/rasa/shared/nlu/training_data/message.py
index a0b8f8dff0e3..11a65cf98d74 100644
--- a/rasa/shared/nlu/training_data/message.py
+++ b/rasa/shared/nlu/training_data/message.py
@@ -21,8 +21,8 @@
     ACTION_TEXT,
     ACTION_NAME,
 )
-from rasa.constants import DIAGNOSTIC_DATA
-from rasa.utils.tensorflow.tf_to_numpy import values_to_numpy
+from rasa.shared.constants import DIAGNOSTIC_DATA
+from rasa.shared.utils.tensorflow.tf_to_numpy import values_to_numpy
 
 if typing.TYPE_CHECKING:
     from rasa.shared.nlu.training_data.features import Features
diff --git a/rasa/utils/tensorflow/tf_to_numpy.py b/rasa/shared/utils/tensorflow/tf_to_numpy.py
similarity index 100%
rename from rasa/utils/tensorflow/tf_to_numpy.py
rename to rasa/shared/utils/tensorflow/tf_to_numpy.py
diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py
index 73145a33531d..bd2dd8a3773a 100644
--- a/tests/nlu/classifiers/test_diet_classifier.py
+++ b/tests/nlu/classifiers/test_diet_classifier.py
@@ -32,7 +32,7 @@
 from rasa.nlu.model import Interpreter
 from rasa.shared.nlu.training_data.message import Message
 from rasa.utils import train_utils
-from rasa.constants import DIAGNOSTIC_DATA
+from rasa.shared.constants import DIAGNOSTIC_DATA
 from tests.conftest import DEFAULT_NLU_DATA
 from tests.nlu.conftest import DEFAULT_DATA_PATH
 
diff --git a/tests/nlu/selectors/test_selectors.py b/tests/nlu/selectors/test_selectors.py
index d25d28c0367e..5a7ffe5f6b97 100644
--- a/tests/nlu/selectors/test_selectors.py
+++ b/tests/nlu/selectors/test_selectors.py
@@ -1,6 +1,7 @@
 from pathlib import Path
 
 import pytest
+import numpy as np
 
 from rasa.nlu import train
 from rasa.nlu.components import ComponentBuilder
@@ -17,6 +18,8 @@
     EVAL_NUM_EXAMPLES,
     CHECKPOINT_MODEL,
 )
+from rasa.shared.nlu.constants import TEXT
+from rasa.shared.constants import DIAGNOSTIC_DATA
 from rasa.nlu.selectors.response_selector import ResponseSelector
 from rasa.shared.nlu.training_data.message import Message
 from rasa.shared.nlu.training_data.training_data import TrainingData
@@ -238,3 +241,54 @@ async def test_train_model_checkpointing(
     """
     all_files = list(best_model_file.rglob("*.*"))
     assert len(all_files) > 4
+
+
+async def test_process_gives_diagnostic_data(
+    component_builder: ComponentBuilder, tmpdir: Path
+):
+    """Test if processing a message returns attention weights as numpy array."""
+
+    _name = "ResponseSelector"
+    _config = RasaNLUModelConfig(
+        {
+            "pipeline": [
+                {"name": "WhitespaceTokenizer"},
+                {"name": "CountVectorsFeaturizer"},
+                {
+                    "name": _name,
+                    EPOCHS: 1,
+                    EVAL_NUM_EXAMPLES: 10,
+                    EVAL_NUM_EPOCHS: 1,
+                    NUM_TRANSFORMER_LAYERS: 1,
+                    TRANSFORMER_SIZE: 8,
+                },
+            ],
+            "language": "en",
+        }
+    )
+
+    (trainer, trained, persisted_path) = await train(
+        _config,
+        path=tmpdir.strpath,
+        data="data/test_selectors",
+        component_builder=component_builder,
+    )
+
+    assert trainer.pipeline
+    assert trained.pipeline
+
+    loaded = Interpreter.load(persisted_path, component_builder)
+
+    message = Message(data={TEXT: "hello"})
+    for component in loaded.pipeline:
+        component.process(message)
+
+    diagnostic_data = message.get(DIAGNOSTIC_DATA)
+
+    # The last component is DIETClassifier, which should add attention weights
+    assert isinstance(diagnostic_data, dict)
+    assert _name in diagnostic_data
+    assert "attention_weights" in diagnostic_data[_name]
+    assert isinstance(diagnostic_data[_name].get("attention_weights"), np.ndarray)
+    assert "text_transformed" in diagnostic_data[_name]
+    assert isinstance(diagnostic_data[_name].get("text_transformed"), np.ndarray)

From 490280ac499bb8207bb977e1311bdc143d1bc757 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Thu, 26 Nov 2020 13:53:33 +0100
Subject: [PATCH 039/102] Add doc string

---
 rasa/shared/nlu/training_data/message.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/rasa/shared/nlu/training_data/message.py b/rasa/shared/nlu/training_data/message.py
index 11a65cf98d74..c7c44bbcf91e 100644
--- a/rasa/shared/nlu/training_data/message.py
+++ b/rasa/shared/nlu/training_data/message.py
@@ -66,7 +66,12 @@ def add_diagnostic_data(self, origin: Text, data: Dict[Text, Any]) -> None:
         else:
             self.data[DIAGNOSTIC_DATA] = {origin: values_to_numpy(data)}
 
-    def set(self, prop, info, add_to_output=False) -> None:
+    def set(self, prop: Text, info: Any, add_to_output=False) -> None:
+        """Set property `prop` to `info`.                
+        Args:
+            prop: Name of the property to be set.
+            info: Value to be assigned to that property.
+        """
         self.data[prop] = info
         if add_to_output:
             self.output_properties.add(prop)

From ae9811c0741bfeaceb5c0c8e888f4cadba3dde23 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Thu, 26 Nov 2020 17:35:15 +0100
Subject: [PATCH 040/102] Remove trailing whitespace

---
 rasa/shared/nlu/training_data/message.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/shared/nlu/training_data/message.py b/rasa/shared/nlu/training_data/message.py
index c7c44bbcf91e..362decb9a5cb 100644
--- a/rasa/shared/nlu/training_data/message.py
+++ b/rasa/shared/nlu/training_data/message.py
@@ -67,7 +67,7 @@ def add_diagnostic_data(self, origin: Text, data: Dict[Text, Any]) -> None:
             self.data[DIAGNOSTIC_DATA] = {origin: values_to_numpy(data)}
 
     def set(self, prop: Text, info: Any, add_to_output=False) -> None:
-        """Set property `prop` to `info`.                
+        """Set property `prop` to `info`.
         Args:
             prop: Name of the property to be set.
             info: Value to be assigned to that property.

From d7f4c8593cff761720690a04eca4e564a8504fe2 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Thu, 26 Nov 2020 18:53:11 +0100
Subject: [PATCH 041/102] Add `unique_name` property

---
 rasa/nlu/classifiers/diet_classifier.py       |  2 +-
 rasa/nlu/components.py                        | 10 ++++++++++
 rasa/nlu/config.py                            | 16 ++++++++++++++--
 rasa/nlu/constants.py                         |  2 ++
 rasa/nlu/featurizers/featurizer.py            |  3 ++-
 rasa/nlu/selectors/response_selector.py       |  2 +-
 tests/nlu/classifiers/test_diet_classifier.py | 14 +++++++-------
 tests/nlu/selectors/test_selectors.py         | 14 +++++++-------
 8 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 82cf1d0adc0a..babf21dfd9ae 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -910,7 +910,7 @@ def process(self, message: Message, **kwargs: Any) -> None:
             message.set(ENTITIES, entities, add_to_output=True)
 
         if out and DIAGNOSTIC_DATA in out:
-            message.add_diagnostic_data(self.name, out.get(DIAGNOSTIC_DATA))
+            message.add_diagnostic_data(self.unique_name, out.get(DIAGNOSTIC_DATA))
 
     def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
         """Persist this model into the passed directory.
diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py
index cf4980b52dfb..7b971c564a49 100644
--- a/rasa/nlu/components.py
+++ b/rasa/nlu/components.py
@@ -11,6 +11,7 @@
 from rasa.shared.exceptions import InvalidConfigException
 from rasa.shared.nlu.training_data.training_data import TrainingData
 from rasa.shared.nlu.training_data.message import Message
+from rasa.nlu.constants import COMPONENT_INDEX
 import rasa.shared.utils.io
 
 if typing.TYPE_CHECKING:
@@ -397,6 +398,15 @@ def name(self) -> Text:
 
         return type(self).name
 
+    # The unique name can be used to distinguish components in
+    # a pipeline, e.g. when the pipeline contains multiple
+    # featurizers of the same type.
+    @property
+    def unique_name(self) -> Text:
+        """Get a unique name for the component in the pipeline."""
+        index = self.component_config.get(COMPONENT_INDEX)
+        return self.name if index is None else str(index) + "_" + self.name
+
     # Which components are required by this component.
     # Listed components should appear before the component itself in the pipeline.
     @classmethod
diff --git a/rasa/nlu/config.py b/rasa/nlu/config.py
index 00678ced09e7..94e1a991d5c0 100644
--- a/rasa/nlu/config.py
+++ b/rasa/nlu/config.py
@@ -13,6 +13,7 @@
     DEFAULT_CONFIG_PATH,
 )
 from rasa.shared.utils.io import json_to_string
+from rasa.nlu.constants import COMPONENT_INDEX
 
 logger = logging.getLogger(__name__)
 
@@ -76,9 +77,20 @@ def component_config_from_pipeline(
     pipeline: List[Dict[Text, Any]],
     defaults: Optional[Dict[Text, Any]] = None,
 ) -> Dict[Text, Any]:
+    """
+    Get the configuration of the `index`th component.
+    Args:
+        index: Index of the component.
+        pipeline: List of component configurations.
+        defaults: Default configuration.
+    Returns:
+        The `index`th component configuration, expanded
+        by the given defaults.
+    """
     try:
-        c = pipeline[index]
-        return override_defaults(defaults, c)
+        configuration = pipeline[index]
+        configuration[COMPONENT_INDEX] = index
+        return override_defaults(defaults, configuration)
     except IndexError:
         rasa.shared.utils.io.raise_warning(
             f"Tried to get configuration value for component "
diff --git a/rasa/nlu/constants.py b/rasa/nlu/constants.py
index 14297822acb3..7ab8858be5f8 100644
--- a/rasa/nlu/constants.py
+++ b/rasa/nlu/constants.py
@@ -78,3 +78,5 @@
 FEATURIZER_CLASS_ALIAS = "alias"
 
 NO_LENGTH_RESTRICTION = -1
+
+COMPONENT_INDEX = "index"
diff --git a/rasa/nlu/featurizers/featurizer.py b/rasa/nlu/featurizers/featurizer.py
index 3fdb63303ee7..8c0462f9a9db 100644
--- a/rasa/nlu/featurizers/featurizer.py
+++ b/rasa/nlu/featurizers/featurizer.py
@@ -12,7 +12,8 @@ def __init__(self, component_config: Optional[Dict[Text, Any]] = None) -> None:
             component_config = {}
 
         # makes sure the alias name is set
-        component_config.setdefault(FEATURIZER_CLASS_ALIAS, self.name)
+        self.component_config = component_config  # Necessary for `unique_name` to be defined
+        component_config.setdefault(FEATURIZER_CLASS_ALIAS, self.unique_name)
 
         super().__init__(component_config)
 
diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py
index 72461e2cacdc..a0c4aa7764d5 100644
--- a/rasa/nlu/selectors/response_selector.py
+++ b/rasa/nlu/selectors/response_selector.py
@@ -436,7 +436,7 @@ def process(self, message: Message, **kwargs: Any) -> None:
         self._set_message_property(message, prediction_dict, selector_key)
 
         if out and DIAGNOSTIC_DATA in out:
-            message.add_diagnostic_data(self.name, out.get(DIAGNOSTIC_DATA))
+            message.add_diagnostic_data(self.unique_name, out.get(DIAGNOSTIC_DATA))
 
     def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
         """Persist this model into the passed directory.
diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py
index bd2dd8a3773a..ebcc01b81984 100644
--- a/tests/nlu/classifiers/test_diet_classifier.py
+++ b/tests/nlu/classifiers/test_diet_classifier.py
@@ -453,13 +453,12 @@ async def test_process_gives_diagnostic_data(
 ):
     """Test if processing a message returns attention weights as numpy array."""
 
-    _name = "DIETClassifier"
     _config = RasaNLUModelConfig(
         {
             "pipeline": [
                 {"name": "WhitespaceTokenizer"},
                 {"name": "CountVectorsFeaturizer"},
-                {"name": _name, RANDOM_SEED: 1, EPOCHS: 1},
+                {"name": "DIETClassifier", RANDOM_SEED: 1, EPOCHS: 1},
             ],
             "language": "en",
         }
@@ -484,9 +483,10 @@ async def test_process_gives_diagnostic_data(
     diagnostic_data = message.get(DIAGNOSTIC_DATA)
 
     # The last component is DIETClassifier, which should add attention weights
+    name = "2_DIETClassifier"
     assert isinstance(diagnostic_data, dict)
-    assert _name in diagnostic_data
-    assert "attention_weights" in diagnostic_data[_name]
-    assert isinstance(diagnostic_data[_name].get("attention_weights"), np.ndarray)
-    assert "text_transformed" in diagnostic_data[_name]
-    assert isinstance(diagnostic_data[_name].get("text_transformed"), np.ndarray)
+    assert name in diagnostic_data
+    assert "attention_weights" in diagnostic_data[name]
+    assert isinstance(diagnostic_data[name].get("attention_weights"), np.ndarray)
+    assert "text_transformed" in diagnostic_data[name]
+    assert isinstance(diagnostic_data[name].get("text_transformed"), np.ndarray)
diff --git a/tests/nlu/selectors/test_selectors.py b/tests/nlu/selectors/test_selectors.py
index 5a7ffe5f6b97..f8be5ffaecd1 100644
--- a/tests/nlu/selectors/test_selectors.py
+++ b/tests/nlu/selectors/test_selectors.py
@@ -248,14 +248,13 @@ async def test_process_gives_diagnostic_data(
 ):
     """Test if processing a message returns attention weights as numpy array."""
 
-    _name = "ResponseSelector"
     _config = RasaNLUModelConfig(
         {
             "pipeline": [
                 {"name": "WhitespaceTokenizer"},
                 {"name": "CountVectorsFeaturizer"},
                 {
-                    "name": _name,
+                    "name": "ResponseSelector",
                     EPOCHS: 1,
                     EVAL_NUM_EXAMPLES: 10,
                     EVAL_NUM_EPOCHS: 1,
@@ -286,9 +285,10 @@ async def test_process_gives_diagnostic_data(
     diagnostic_data = message.get(DIAGNOSTIC_DATA)
 
     # The last component is DIETClassifier, which should add attention weights
+    name = f"2_ResponseSelector"
     assert isinstance(diagnostic_data, dict)
-    assert _name in diagnostic_data
-    assert "attention_weights" in diagnostic_data[_name]
-    assert isinstance(diagnostic_data[_name].get("attention_weights"), np.ndarray)
-    assert "text_transformed" in diagnostic_data[_name]
-    assert isinstance(diagnostic_data[_name].get("text_transformed"), np.ndarray)
+    assert name in diagnostic_data
+    assert "attention_weights" in diagnostic_data[name]
+    assert isinstance(diagnostic_data[name].get("attention_weights"), np.ndarray)
+    assert "text_transformed" in diagnostic_data[name]
+    assert isinstance(diagnostic_data[name].get("text_transformed"), np.ndarray)

From 86bd3eccde60a8316537b7dc4eabbc8eec7e7f89 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Thu, 26 Nov 2020 19:52:39 +0100
Subject: [PATCH 042/102] Fix `test_set_attr_on_component`

---
 tests/nlu/selectors/test_selectors.py | 2 +-
 tests/nlu/test_config.py              | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/nlu/selectors/test_selectors.py b/tests/nlu/selectors/test_selectors.py
index f8be5ffaecd1..e4338b49f80a 100644
--- a/tests/nlu/selectors/test_selectors.py
+++ b/tests/nlu/selectors/test_selectors.py
@@ -285,7 +285,7 @@ async def test_process_gives_diagnostic_data(
     diagnostic_data = message.get(DIAGNOSTIC_DATA)
 
     # The last component is DIETClassifier, which should add attention weights
-    name = f"2_ResponseSelector"
+    name = "2_ResponseSelector"
     assert isinstance(diagnostic_data, dict)
     assert name in diagnostic_data
     assert "attention_weights" in diagnostic_data[name]
diff --git a/tests/nlu/test_config.py b/tests/nlu/test_config.py
index e11c9a473c6a..9e9e40251a2e 100644
--- a/tests/nlu/test_config.py
+++ b/tests/nlu/test_config.py
@@ -13,6 +13,7 @@
 import rasa.shared.nlu.training_data.loading
 from rasa.nlu import components
 from rasa.nlu.components import ComponentBuilder
+from rasa.nlu.constants import COMPONENT_INDEX
 from rasa.shared.nlu.constants import TRAINABLE_EXTRACTORS
 from rasa.nlu.model import Trainer
 from tests.nlu.utilities import write_file_config
@@ -98,10 +99,14 @@ def test_set_attr_on_component():
 
     _config.set_component_attr(idx_classifier, epochs=10)
 
-    assert _config.for_component(idx_tokenizer) == {"name": "SpacyTokenizer"}
+    assert _config.for_component(idx_tokenizer) == {
+        "name": "SpacyTokenizer",
+        COMPONENT_INDEX: idx_tokenizer,
+    }
     assert _config.for_component(idx_classifier) == {
         "name": "DIETClassifier",
         "epochs": 10,
+        COMPONENT_INDEX: idx_classifier,
     }
 
 

From 7e1e02d1550ffe62608cade3d34cfe9dbcfd7ba9 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Thu, 26 Nov 2020 19:57:48 +0100
Subject: [PATCH 043/102] Apply BLACK formatting

---
 rasa/nlu/featurizers/featurizer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/rasa/nlu/featurizers/featurizer.py b/rasa/nlu/featurizers/featurizer.py
index 8c0462f9a9db..0320fe6007f8 100644
--- a/rasa/nlu/featurizers/featurizer.py
+++ b/rasa/nlu/featurizers/featurizer.py
@@ -12,7 +12,9 @@ def __init__(self, component_config: Optional[Dict[Text, Any]] = None) -> None:
             component_config = {}
 
         # makes sure the alias name is set
-        self.component_config = component_config  # Necessary for `unique_name` to be defined
+        self.component_config = (
+            component_config  # Necessary for `unique_name` to be defined
+        )
         component_config.setdefault(FEATURIZER_CLASS_ALIAS, self.unique_name)
 
         super().__init__(component_config)

From 3ee79e2918d3ee9de80448d5cdf4a8d7c315c3f8 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Thu, 26 Nov 2020 20:12:48 +0100
Subject: [PATCH 044/102] Fix formatting

---
 rasa/nlu/config.py                       | 4 ++--
 rasa/shared/nlu/training_data/message.py | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/rasa/nlu/config.py b/rasa/nlu/config.py
index 94e1a991d5c0..4fd0be7f028e 100644
--- a/rasa/nlu/config.py
+++ b/rasa/nlu/config.py
@@ -77,8 +77,8 @@ def component_config_from_pipeline(
     pipeline: List[Dict[Text, Any]],
     defaults: Optional[Dict[Text, Any]] = None,
 ) -> Dict[Text, Any]:
-    """
-    Get the configuration of the `index`th component.
+    """Get the configuration of the `index`th component.
+
     Args:
         index: Index of the component.
         pipeline: List of component configurations.
diff --git a/rasa/shared/nlu/training_data/message.py b/rasa/shared/nlu/training_data/message.py
index 362decb9a5cb..8e8ef7cf0ce4 100644
--- a/rasa/shared/nlu/training_data/message.py
+++ b/rasa/shared/nlu/training_data/message.py
@@ -68,9 +68,11 @@ def add_diagnostic_data(self, origin: Text, data: Dict[Text, Any]) -> None:
 
     def set(self, prop: Text, info: Any, add_to_output=False) -> None:
         """Set property `prop` to `info`.
+
         Args:
             prop: Name of the property to be set.
             info: Value to be assigned to that property.
+            add_to_output: Decides whether to add `prop` to the `output_properties`.
         """
         self.data[prop] = info
         if add_to_output:

From af7627e53bbe0c7d3d3d4933a09345e0a1792799 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Fri, 27 Nov 2020 10:09:13 +0100
Subject: [PATCH 045/102] Add newline

---
 rasa/nlu/config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rasa/nlu/config.py b/rasa/nlu/config.py
index 4fd0be7f028e..7c65749325e9 100644
--- a/rasa/nlu/config.py
+++ b/rasa/nlu/config.py
@@ -83,6 +83,7 @@ def component_config_from_pipeline(
         index: Index of the component.
         pipeline: List of component configurations.
         defaults: Default configuration.
+        
     Returns:
         The `index`th component configuration, expanded
         by the given defaults.

From e797e405c3c82b6df6080f9d4016d5bb3949f2ae Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Fri, 27 Nov 2020 11:13:21 +0100
Subject: [PATCH 046/102] Remove whitespace

---
 rasa/nlu/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/nlu/config.py b/rasa/nlu/config.py
index 7c65749325e9..021d527409df 100644
--- a/rasa/nlu/config.py
+++ b/rasa/nlu/config.py
@@ -83,7 +83,7 @@ def component_config_from_pipeline(
         index: Index of the component.
         pipeline: List of component configurations.
         defaults: Default configuration.
-        
+
     Returns:
         The `index`th component configuration, expanded
         by the given defaults.

From 5a849eb575748b89977c25a8596a5aca85a4ce24 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Fri, 27 Nov 2020 17:20:19 +0100
Subject: [PATCH 047/102] Update rasa/nlu/featurizers/featurizer.py

Co-authored-by: Vladimir Vlasov <vladimir@rasa.com>
---
 rasa/nlu/featurizers/featurizer.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/rasa/nlu/featurizers/featurizer.py b/rasa/nlu/featurizers/featurizer.py
index 0320fe6007f8..9c3bfa9a5790 100644
--- a/rasa/nlu/featurizers/featurizer.py
+++ b/rasa/nlu/featurizers/featurizer.py
@@ -12,9 +12,8 @@ def __init__(self, component_config: Optional[Dict[Text, Any]] = None) -> None:
             component_config = {}
 
         # makes sure the alias name is set
-        self.component_config = (
-            component_config  # Necessary for `unique_name` to be defined
-        )
+        # Necessary for `unique_name` to be defined
+        self.component_config = component_config
         component_config.setdefault(FEATURIZER_CLASS_ALIAS, self.unique_name)
 
         super().__init__(component_config)

From 4225b8464dfc6715783b260c1bcf4c35d9de243b Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Fri, 27 Nov 2020 17:20:33 +0100
Subject: [PATCH 048/102] Update rasa/core/policies/policy.py

Co-authored-by: Vladimir Vlasov <vladimir@rasa.com>
---
 rasa/core/policies/policy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/core/policies/policy.py b/rasa/core/policies/policy.py
index c1917ec1205a..9f38282878d5 100644
--- a/rasa/core/policies/policy.py
+++ b/rasa/core/policies/policy.py
@@ -399,7 +399,7 @@ def __init__(
         self.events = events or []
         self.optional_events = optional_events or []
         self.is_end_to_end_prediction = is_end_to_end_prediction
-        self.diagnostic_data = diagnostic_data or dict()
+        self.diagnostic_data = diagnostic_data or {}
 
     @staticmethod
     def for_action_name(

From c2c59d0452d63626c5021a4e5e2312e058b6ae41 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Tue, 1 Dec 2020 09:40:29 +0100
Subject: [PATCH 049/102] Update rasa/nlu/components.py

Co-authored-by: Tobias Wochinger <t.wochinger@rasa.com>
---
 rasa/nlu/components.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py
index 7b971c564a49..adb2ce9c1845 100644
--- a/rasa/nlu/components.py
+++ b/rasa/nlu/components.py
@@ -405,7 +405,7 @@ def name(self) -> Text:
     def unique_name(self) -> Text:
         """Get a unique name for the component in the pipeline."""
         index = self.component_config.get(COMPONENT_INDEX)
-        return self.name if index is None else str(index) + "_" + self.name
+        return self.name if index is None else f"{index}_{self.name}"
 
     # Which components are required by this component.
     # Listed components should appear before the component itself in the pipeline.

From 834ef6cfb455206cc7a8e5cb9c37591c84989252 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Tue, 1 Dec 2020 09:40:41 +0100
Subject: [PATCH 050/102] Update rasa/core/policies/policy.py

Co-authored-by: Tobias Wochinger <t.wochinger@rasa.com>
---
 rasa/core/policies/policy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/core/policies/policy.py b/rasa/core/policies/policy.py
index 9f38282878d5..c05f1e2d0db7 100644
--- a/rasa/core/policies/policy.py
+++ b/rasa/core/policies/policy.py
@@ -390,7 +390,7 @@ def __init__(
             is_end_to_end_prediction: `True` if the prediction used the text of the
                 user message instead of the intent.
             diagnostic_data: Intermediate results or other information that is not
-                necessary for Rasa to function, but intended for debuging and
+                necessary for Rasa to function, but intended for debugging and
                 fine-tuning purposes.
         """
         self.probabilities = probabilities

From a7d3297bfa6ba9a3217466c791f82bebbe924b4f Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Tue, 1 Dec 2020 09:41:10 +0100
Subject: [PATCH 051/102] Update rasa/nlu/config.py

Co-authored-by: Tobias Wochinger <t.wochinger@rasa.com>
---
 rasa/nlu/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/nlu/config.py b/rasa/nlu/config.py
index 021d527409df..9ea1b0f15426 100644
--- a/rasa/nlu/config.py
+++ b/rasa/nlu/config.py
@@ -77,7 +77,7 @@ def component_config_from_pipeline(
     pipeline: List[Dict[Text, Any]],
     defaults: Optional[Dict[Text, Any]] = None,
 ) -> Dict[Text, Any]:
-    """Get the configuration of the `index`th component.
+    """Gets the configuration of the `index`th component.
 
     Args:
         index: Index of the component.

From 0757363603a15987a78eb15f81e32c70cd17fe52 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Tue, 1 Dec 2020 09:41:39 +0100
Subject: [PATCH 052/102] Update rasa/nlu/config.py

Co-authored-by: Tobias Wochinger <t.wochinger@rasa.com>
---
 rasa/nlu/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/nlu/config.py b/rasa/nlu/config.py
index 9ea1b0f15426..8cb76cd88c44 100644
--- a/rasa/nlu/config.py
+++ b/rasa/nlu/config.py
@@ -81,7 +81,7 @@ def component_config_from_pipeline(
 
     Args:
         index: Index of the component.
-        pipeline: List of component configurations.
+        pipeline: Configuration of the components.
         defaults: Default configuration.
 
     Returns:

From ac38f06b3fd5bc95b6736a9136be15faf3d3cf14 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Tue, 1 Dec 2020 09:41:53 +0100
Subject: [PATCH 053/102] Update rasa/shared/nlu/training_data/message.py

Co-authored-by: Tobias Wochinger <t.wochinger@rasa.com>
---
 rasa/shared/nlu/training_data/message.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/shared/nlu/training_data/message.py b/rasa/shared/nlu/training_data/message.py
index 8e8ef7cf0ce4..e38f29a20aab 100644
--- a/rasa/shared/nlu/training_data/message.py
+++ b/rasa/shared/nlu/training_data/message.py
@@ -66,7 +66,7 @@ def add_diagnostic_data(self, origin: Text, data: Dict[Text, Any]) -> None:
         else:
             self.data[DIAGNOSTIC_DATA] = {origin: values_to_numpy(data)}
 
-    def set(self, prop: Text, info: Any, add_to_output=False) -> None:
+    def set(self, prop: Text, info: Any, add_to_output: bool = False) -> None:
         """Set property `prop` to `info`.
 
         Args:

From b5e110485956475a9c9187c3b4e6e49641efba38 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 1 Dec 2020 09:54:35 +0100
Subject: [PATCH 054/102] Move `values_to_numpy` out of `shared`

---
 rasa/nlu/classifiers/diet_classifier.py           | 6 ++++--
 rasa/nlu/selectors/response_selector.py           | 6 ++++--
 rasa/shared/nlu/training_data/message.py          | 5 ++---
 rasa/{shared => }/utils/tensorflow/tf_to_numpy.py | 0
 4 files changed, 10 insertions(+), 7 deletions(-)
 rename rasa/{shared => }/utils/tensorflow/tf_to_numpy.py (100%)

diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index babf21dfd9ae..04a0cceac52b 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -25,7 +25,7 @@
 from rasa.utils.tensorflow import layers
 from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
 from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature
-from rasa.shared.utils.tensorflow.tf_to_numpy import values_to_numpy
+from rasa.utils.tensorflow.tf_to_numpy import values_to_numpy
 from rasa.nlu.constants import TOKENS_NAMES
 from rasa.shared.nlu.constants import (
     TEXT,
@@ -910,7 +910,9 @@ def process(self, message: Message, **kwargs: Any) -> None:
             message.set(ENTITIES, entities, add_to_output=True)
 
         if out and DIAGNOSTIC_DATA in out:
-            message.add_diagnostic_data(self.unique_name, out.get(DIAGNOSTIC_DATA))
+            message.add_diagnostic_data(
+                self.unique_name, values_to_numpy(out.get(DIAGNOSTIC_DATA))
+            )
 
     def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
         """Persist this model into the passed directory.
diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py
index a0c4aa7764d5..e5dc9d8cf2cc 100644
--- a/rasa/nlu/selectors/response_selector.py
+++ b/rasa/nlu/selectors/response_selector.py
@@ -7,7 +7,7 @@
 from typing import Any, Dict, Optional, Text, Tuple, Union, List, Type
 
 from rasa.shared.constants import DIAGNOSTIC_DATA
-from rasa.shared.utils.tensorflow.tf_to_numpy import values_to_numpy
+from rasa.utils.tensorflow.tf_to_numpy import values_to_numpy
 from rasa.shared.nlu.training_data import util
 import rasa.shared.utils.io
 from rasa.shared.exceptions import InvalidConfigException
@@ -436,7 +436,9 @@ def process(self, message: Message, **kwargs: Any) -> None:
         self._set_message_property(message, prediction_dict, selector_key)
 
         if out and DIAGNOSTIC_DATA in out:
-            message.add_diagnostic_data(self.unique_name, out.get(DIAGNOSTIC_DATA))
+            message.add_diagnostic_data(
+                self.unique_name, values_to_numpy(out.get(DIAGNOSTIC_DATA))
+            )
 
     def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
         """Persist this model into the passed directory.
diff --git a/rasa/shared/nlu/training_data/message.py b/rasa/shared/nlu/training_data/message.py
index e38f29a20aab..4633043f173d 100644
--- a/rasa/shared/nlu/training_data/message.py
+++ b/rasa/shared/nlu/training_data/message.py
@@ -22,7 +22,6 @@
     ACTION_NAME,
 )
 from rasa.shared.constants import DIAGNOSTIC_DATA
-from rasa.shared.utils.tensorflow.tf_to_numpy import values_to_numpy
 
 if typing.TYPE_CHECKING:
     from rasa.shared.nlu.training_data.features import Features
@@ -62,9 +61,9 @@ def add_diagnostic_data(self, origin: Text, data: Dict[Text, Any]) -> None:
                 f"data will be overwritten."
             )
         if DIAGNOSTIC_DATA in self.data:
-            self.data[DIAGNOSTIC_DATA][origin] = values_to_numpy(data)
+            self.data[DIAGNOSTIC_DATA][origin] = data
         else:
-            self.data[DIAGNOSTIC_DATA] = {origin: values_to_numpy(data)}
+            self.data[DIAGNOSTIC_DATA] = {origin: data}
 
     def set(self, prop: Text, info: Any, add_to_output: bool = False) -> None:
         """Set property `prop` to `info`.
diff --git a/rasa/shared/utils/tensorflow/tf_to_numpy.py b/rasa/utils/tensorflow/tf_to_numpy.py
similarity index 100%
rename from rasa/shared/utils/tensorflow/tf_to_numpy.py
rename to rasa/utils/tensorflow/tf_to_numpy.py

From 74ac0d94c949dfb9b95118d2c15130eba497b169 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 1 Dec 2020 09:57:18 +0100
Subject: [PATCH 055/102] Avoid comparing diagnostic data for prediction

---
 rasa/core/policies/policy.py | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/rasa/core/policies/policy.py b/rasa/core/policies/policy.py
index c05f1e2d0db7..fd3e9cbf2d42 100644
--- a/rasa/core/policies/policy.py
+++ b/rasa/core/policies/policy.py
@@ -435,21 +435,6 @@ def __eq__(self, other: Any) -> bool:
         if not isinstance(other, PolicyPrediction):
             return False
 
-        # The `diagnostic_data` values can be numpy arrays, so we have to check
-        # these separately
-        for key, value in self.diagnostic_data.items():
-            if key not in other.diagnostic_data:
-                return False
-            if isinstance(value, np.ndarray) and not np.array_equal(
-                value, other.diagnostic_data[key]
-            ):
-                return False
-            elif (
-                not isinstance(value, np.ndarray)
-                and value != other.diagnostic_data[key]
-            ):
-                return False
-
         return (
             self.probabilities == other.probabilities
             and self.policy_name == other.policy_name
@@ -457,6 +442,7 @@ def __eq__(self, other: Any) -> bool:
             and self.events == other.events
             and self.optional_events == other.events
             and self.is_end_to_end_prediction == other.is_end_to_end_prediction
+            # We do not compare `diagnostic_data`
         )
 
     @property

From a3d46e661c87f1edb3f420fc365c804ad3a7c0b1 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 1 Dec 2020 11:13:55 +0100
Subject: [PATCH 056/102] Update doc strings and fix import

---
 rasa/core/policies/ted_policy.py |  2 +-
 rasa/nlu/components.py           | 31 ++++++++++++++++++-------------
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index 1a0fc1f5e116..0179526c4720 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -30,7 +30,7 @@
 from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
 from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature
 from rasa.utils.tensorflow.model_data_utils import convert_to_data_format
-from rasa.shared.utils.tensorflow.tf_to_numpy import values_to_numpy
+from rasa.utils.tensorflow.tf_to_numpy import values_to_numpy
 from rasa.utils.tensorflow.constants import (
     LABEL,
     TRANSFORMER_SIZE,
diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py
index adb2ce9c1845..046a136a5353 100644
--- a/rasa/nlu/components.py
+++ b/rasa/nlu/components.py
@@ -388,33 +388,38 @@ class Component(metaclass=ComponentMetaclass):
     the pipeline to do intent classification.
     """
 
-    # Component class name is used when integrating it in a
-    # pipeline. E.g. ``[ComponentA, ComponentB]``
-    # will be a proper pipeline definition where ``ComponentA``
-    # is the name of the first component of the pipeline.
     @property
     def name(self) -> Text:
-        """Access the class's property name from an instance."""
+        """Access the class's property name from an instance.
+        
+        Component class name is used when integrating it in a
+        pipeline. E.g. ``[ComponentA, ComponentB]``
+        will be a proper pipeline definition where ``ComponentA``
+        is the name of the first component of the pipeline.
+        """
 
         return type(self).name
 
-    # The unique name can be used to distinguish components in
-    # a pipeline, e.g. when the pipeline contains multiple
-    # featurizers of the same type.
     @property
     def unique_name(self) -> Text:
-        """Get a unique name for the component in the pipeline."""
+        """Get a unique name for the component in the pipeline.
+        
+        The unique name can be used to distinguish components in
+        a pipeline, e.g. when the pipeline contains multiple
+        featurizers of the same type.
+        """
         index = self.component_config.get(COMPONENT_INDEX)
-        return self.name if index is None else f"{index}_{self.name}"
+        return self.name if index is None else f"component_{index}_{self.name}"
 
-    # Which components are required by this component.
-    # Listed components should appear before the component itself in the pipeline.
     @classmethod
     def required_components(cls) -> List[Type["Component"]]:
         """Specify which components need to be present in the pipeline.
 
+        Which components are required by this component.
+        Listed components should appear before the component itself in the pipeline.
+
         Returns:
-            The list of class names of required components.
+            The class names of the required components.
         """
 
         return []

From 109378d67ee913beba96900b666402d843a2fc9d Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 1 Dec 2020 11:34:38 +0100
Subject: [PATCH 057/102] Remove spaces

---
 rasa/nlu/components.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py
index 046a136a5353..8e1a87c7aad3 100644
--- a/rasa/nlu/components.py
+++ b/rasa/nlu/components.py
@@ -391,7 +391,7 @@ class Component(metaclass=ComponentMetaclass):
     @property
     def name(self) -> Text:
         """Access the class's property name from an instance.
-        
+
         Component class name is used when integrating it in a
         pipeline. E.g. ``[ComponentA, ComponentB]``
         will be a proper pipeline definition where ``ComponentA``
@@ -403,7 +403,7 @@ def name(self) -> Text:
     @property
     def unique_name(self) -> Text:
         """Get a unique name for the component in the pipeline.
-        
+
         The unique name can be used to distinguish components in
         a pipeline, e.g. when the pipeline contains multiple
         featurizers of the same type.

From 01d342c78e6c03232c3fcb30e58c6f4dd16b855e Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 1 Dec 2020 11:46:56 +0100
Subject: [PATCH 058/102] Update changelog

---
 changelog/5673.improvement.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/changelog/5673.improvement.md b/changelog/5673.improvement.md
index 56f981440b96..c7a502b9c192 100644
--- a/changelog/5673.improvement.md
+++ b/changelog/5673.improvement.md
@@ -1,4 +1,8 @@
-Return diagnostic data for action and NLU predictions.
+Expose diagnostic data for action and NLU predictions.
+
+Add `diagnostic_data` field to the [Message](https://rasa.com/docs/rasa/reference/rasa/shared/nlu/training_data/message#message-objects) 
+and [Prediction](https://rasa.com/docs/rasa/reference/rasa/core/policies/policy#policyprediction-objects) objects, which contain 
+information about attention weights and other intermediate results of the inference computation.
 
 DIET and TED now both expose the attention weights of their transformer layers.
 This can be used for debugging and fine-tuning, e.g. with RasaLit.

From 1db2d94f9588d665aeebc3c342dec9edcc2f46d3 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 1 Dec 2020 12:03:01 +0100
Subject: [PATCH 059/102] Add minimal examples

---
 changelog/5673.improvement.md | 82 ++++++++++++++++++++++++++++++++++-
 1 file changed, 80 insertions(+), 2 deletions(-)

diff --git a/changelog/5673.improvement.md b/changelog/5673.improvement.md
index c7a502b9c192..9fafcbffa3a4 100644
--- a/changelog/5673.improvement.md
+++ b/changelog/5673.improvement.md
@@ -3,6 +3,84 @@ Expose diagnostic data for action and NLU predictions.
 Add `diagnostic_data` field to the [Message](https://rasa.com/docs/rasa/reference/rasa/shared/nlu/training_data/message#message-objects) 
 and [Prediction](https://rasa.com/docs/rasa/reference/rasa/core/policies/policy#policyprediction-objects) objects, which contain 
 information about attention weights and other intermediate results of the inference computation.
+This infromation can be used for debugging and fine-tuning, e.g. with RasaLit.
 
-DIET and TED now both expose the attention weights of their transformer layers.
-This can be used for debugging and fine-tuning, e.g. with RasaLit.
+You can access diagnostic data for DIET like this:
+```
+from rasa.cli.utils import get_validated_path
+from rasa.model import get_model, get_model_subdirectories
+from rasa.nlu.model import Interpreter
+from rasa.shared.nlu.training_data.message import Message
+from rasa.shared.nlu.constants import TEXT
+from rasa.shared.constants import DIAGNOSTIC_DATA
+import pathlib
+
+
+def load_interpreter(model_dir, model):
+    path_str = str(pathlib.Path(model_dir) / model)
+    model = get_validated_path(path_str, "model")
+    model_path = get_model(model)
+    _, nlu_model = get_model_subdirectories(model_path)
+    return Interpreter.load(nlu_model)
+
+
+if __name__ == "__main__":
+    interpreter = load_interpreter("/home/jem-mosig/rasa/rasa/examples/johannes/test_atenweights/init/models/", "20201125-181411.tar.gz")
+    data = interpreter.default_output_attributes()
+    data[TEXT] = "hello world"
+    message = Message(data=data)
+    for e in interpreter.pipeline:
+        e.process(message)
+    nlu_diagnostic_data = message.as_dict()[DIAGNOSTIC_DATA]
+
+    for component_name, diagnostic_data in nlu_diagnostic_data.items():
+        # Attention weights:
+        print(f"attention_weights for {component_name}:")
+        attention_weights = diagnostic_data["attention_weights"]
+        print(attention_weights)
+
+        print(f"\ntext_transformed for {component_name}:")
+        text_transformed = diagnostic_data["text_transformed"]
+        print(text_transformed)
+
+```
+
+You can access diagnostic data for TED like this:
+```
+from rasa.core.policies.ted_policy import TEDPolicy
+from rasa.shared.core.constants import ACTION_LISTEN_NAME
+from rasa.shared.core.domain import Domain
+from rasa.shared.core.events import ActionExecuted, UserUttered
+from rasa.shared.core.trackers import DialogueStateTracker
+from rasa.shared.nlu.interpreter import RegexInterpreter
+
+UTTER_GREET_ACTION = "utter_greet"
+GREET_INTENT_NAME = "greet"
+DOMAIN_YAML = f"""
+intents:
+- {GREET_INTENT_NAME}
+actions:
+- {UTTER_GREET_ACTION}
+"""
+
+
+if __name__ == "__main__":
+    domain = Domain.from_yaml(DOMAIN_YAML)
+    policy = TEDPolicy()
+    GREET_RULE = DialogueStateTracker.from_events(
+        "greet rule",
+        evts=[
+            UserUttered(intent={"name": GREET_INTENT_NAME}),
+            ActionExecuted(UTTER_GREET_ACTION),
+            ActionExecuted(ACTION_LISTEN_NAME),
+            UserUttered(intent={"name": GREET_INTENT_NAME}),
+            ActionExecuted(ACTION_LISTEN_NAME),
+        ],
+    )
+    policy.train([GREET_RULE], domain, RegexInterpreter())
+    prediction = policy.predict_action_probabilities(
+        GREET_RULE, domain, RegexInterpreter()
+    )
+
+    print(f"{prediction.diagnostic_data.get('attention_weights')}")
+```

From cf88309cb19bd26a4fd6e95bf17ac0027b7a5346 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 1 Dec 2020 12:06:18 +0100
Subject: [PATCH 060/102] Remove path

---
 changelog/5673.improvement.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/changelog/5673.improvement.md b/changelog/5673.improvement.md
index 9fafcbffa3a4..867ecf5814b1 100644
--- a/changelog/5673.improvement.md
+++ b/changelog/5673.improvement.md
@@ -5,7 +5,7 @@ and [Prediction](https://rasa.com/docs/rasa/reference/rasa/core/policies/policy#
 information about attention weights and other intermediate results of the inference computation.
 This infromation can be used for debugging and fine-tuning, e.g. with RasaLit.
 
-You can access diagnostic data for DIET like this:
+You can access diagnostic data for DIET like this (please substitute `<your-rasa-model-directory>` and `<your-rasa-model-name>`):
 ```
 from rasa.cli.utils import get_validated_path
 from rasa.model import get_model, get_model_subdirectories
@@ -25,7 +25,7 @@ def load_interpreter(model_dir, model):
 
 
 if __name__ == "__main__":
-    interpreter = load_interpreter("/home/jem-mosig/rasa/rasa/examples/johannes/test_atenweights/init/models/", "20201125-181411.tar.gz")
+    interpreter = load_interpreter("<your-rasa-model-directory>", "<your-rasa-model-name>.tar.gz")
     data = interpreter.default_output_attributes()
     data[TEXT] = "hello world"
     message = Message(data=data)

From b9bc4674b8138ecc36e3282e0a073083e8886cfe Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 1 Dec 2020 12:07:16 +0100
Subject: [PATCH 061/102] Remove comment

---
 changelog/5673.improvement.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/changelog/5673.improvement.md b/changelog/5673.improvement.md
index 867ecf5814b1..bd0464bc9d07 100644
--- a/changelog/5673.improvement.md
+++ b/changelog/5673.improvement.md
@@ -34,7 +34,6 @@ if __name__ == "__main__":
     nlu_diagnostic_data = message.as_dict()[DIAGNOSTIC_DATA]
 
     for component_name, diagnostic_data in nlu_diagnostic_data.items():
-        # Attention weights:
         print(f"attention_weights for {component_name}:")
         attention_weights = diagnostic_data["attention_weights"]
         print(attention_weights)

From b8bb3dbfdd99b594bf49c7fdb4c4f7dc92b05a60 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 1 Dec 2020 12:08:43 +0100
Subject: [PATCH 062/102] Add blank line

---
 changelog/5673.improvement.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/changelog/5673.improvement.md b/changelog/5673.improvement.md
index bd0464bc9d07..ecca9b57e17c 100644
--- a/changelog/5673.improvement.md
+++ b/changelog/5673.improvement.md
@@ -53,6 +53,7 @@ from rasa.shared.core.events import ActionExecuted, UserUttered
 from rasa.shared.core.trackers import DialogueStateTracker
 from rasa.shared.nlu.interpreter import RegexInterpreter
 
+
 UTTER_GREET_ACTION = "utter_greet"
 GREET_INTENT_NAME = "greet"
 DOMAIN_YAML = f"""

From 6ae03fa0a1bc644fad4618e461ec17176c692e92 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 1 Dec 2020 12:24:47 +0100
Subject: [PATCH 063/102] Lint

---
 changelog/5673.improvement.md | 2 ++
 rasa/nlu/components.py        | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/changelog/5673.improvement.md b/changelog/5673.improvement.md
index ecca9b57e17c..158ab2fd31c7 100644
--- a/changelog/5673.improvement.md
+++ b/changelog/5673.improvement.md
@@ -6,6 +6,7 @@ information about attention weights and other intermediate results of the infere
 This infromation can be used for debugging and fine-tuning, e.g. with RasaLit.
 
 You can access diagnostic data for DIET like this (please substitute `<your-rasa-model-directory>` and `<your-rasa-model-name>`):
+
 ```
 from rasa.cli.utils import get_validated_path
 from rasa.model import get_model, get_model_subdirectories
@@ -45,6 +46,7 @@ if __name__ == "__main__":
 ```
 
 You can access diagnostic data for TED like this:
+
 ```
 from rasa.core.policies.ted_policy import TEDPolicy
 from rasa.shared.core.constants import ACTION_LISTEN_NAME
diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py
index 8e1a87c7aad3..81f070ea31c2 100644
--- a/rasa/nlu/components.py
+++ b/rasa/nlu/components.py
@@ -397,7 +397,6 @@ def name(self) -> Text:
         will be a proper pipeline definition where ``ComponentA``
         is the name of the first component of the pipeline.
         """
-
         return type(self).name
 
     @property
@@ -421,7 +420,6 @@ def required_components(cls) -> List[Type["Component"]]:
         Returns:
             The class names of the required components.
         """
-
         return []
 
     # Defines the default configuration parameters of a component

From 8c5e60770d286727de1898360f805918bf95af01 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 1 Dec 2020 12:44:25 +0100
Subject: [PATCH 064/102] Add `python` declaration

---
 changelog/5673.improvement.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/changelog/5673.improvement.md b/changelog/5673.improvement.md
index 158ab2fd31c7..b89e74113a39 100644
--- a/changelog/5673.improvement.md
+++ b/changelog/5673.improvement.md
@@ -7,7 +7,7 @@ This infromation can be used for debugging and fine-tuning, e.g. with RasaLit.
 
 You can access diagnostic data for DIET like this (please substitute `<your-rasa-model-directory>` and `<your-rasa-model-name>`):
 
-```
+```python
 from rasa.cli.utils import get_validated_path
 from rasa.model import get_model, get_model_subdirectories
 from rasa.nlu.model import Interpreter
@@ -47,7 +47,7 @@ if __name__ == "__main__":
 
 You can access diagnostic data for TED like this:
 
-```
+```python
 from rasa.core.policies.ted_policy import TEDPolicy
 from rasa.shared.core.constants import ACTION_LISTEN_NAME
 from rasa.shared.core.domain import Domain

From 611c4645cd0ede91fdb7232f00b7818ccce79330 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 1 Dec 2020 12:57:34 +0100
Subject: [PATCH 065/102] Avoid angle brackets

---
 changelog/5673.improvement.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/changelog/5673.improvement.md b/changelog/5673.improvement.md
index b89e74113a39..56f7a70e0e64 100644
--- a/changelog/5673.improvement.md
+++ b/changelog/5673.improvement.md
@@ -5,7 +5,7 @@ and [Prediction](https://rasa.com/docs/rasa/reference/rasa/core/policies/policy#
 information about attention weights and other intermediate results of the inference computation.
 This infromation can be used for debugging and fine-tuning, e.g. with RasaLit.
 
-You can access diagnostic data for DIET like this (please substitute `<your-rasa-model-directory>` and `<your-rasa-model-name>`):
+You can access diagnostic data for DIET like this (please define the `YOUR_RASA_MODEL_DIRECTORY` and `YOUR_RASA_MODEL_NAME` constants):
 
 ```python
 from rasa.cli.utils import get_validated_path
@@ -26,7 +26,7 @@ def load_interpreter(model_dir, model):
 
 
 if __name__ == "__main__":
-    interpreter = load_interpreter("<your-rasa-model-directory>", "<your-rasa-model-name>.tar.gz")
+    interpreter = load_interpreter(YOUR_RASA_MODEL_DIRECTORY, f"{YOUR_RASA_MODEL_NAME}.tar.gz")
     data = interpreter.default_output_attributes()
     data[TEXT] = "hello world"
     message = Message(data=data)

From 77ad137a3c1b2026e9462766060824f14ac8c5cc Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 1 Dec 2020 13:31:48 +0100
Subject: [PATCH 066/102] Fix tests

---
 tests/nlu/classifiers/test_diet_classifier.py | 2 +-
 tests/nlu/selectors/test_selectors.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py
index ebcc01b81984..1ef442bd802c 100644
--- a/tests/nlu/classifiers/test_diet_classifier.py
+++ b/tests/nlu/classifiers/test_diet_classifier.py
@@ -483,7 +483,7 @@ async def test_process_gives_diagnostic_data(
     diagnostic_data = message.get(DIAGNOSTIC_DATA)
 
     # The last component is DIETClassifier, which should add attention weights
-    name = "2_DIETClassifier"
+    name = "component_2_DIETClassifier"
     assert isinstance(diagnostic_data, dict)
     assert name in diagnostic_data
     assert "attention_weights" in diagnostic_data[name]
diff --git a/tests/nlu/selectors/test_selectors.py b/tests/nlu/selectors/test_selectors.py
index e4338b49f80a..e4f83242af47 100644
--- a/tests/nlu/selectors/test_selectors.py
+++ b/tests/nlu/selectors/test_selectors.py
@@ -285,7 +285,7 @@ async def test_process_gives_diagnostic_data(
     diagnostic_data = message.get(DIAGNOSTIC_DATA)
 
     # The last component is DIETClassifier, which should add attention weights
-    name = "2_ResponseSelector"
+    name = "component_2_ResponseSelector"
     assert isinstance(diagnostic_data, dict)
     assert name in diagnostic_data
     assert "attention_weights" in diagnostic_data[name]

From 7f43fb9c26035f2b6c6f92fcc6b9282ca2c16177 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Thu, 10 Dec 2020 10:33:59 +0100
Subject: [PATCH 067/102] Update rasa/nlu/classifiers/diet_classifier.py

Co-authored-by: Tobias Wochinger <t.wochinger@rasa.com>
---
 rasa/nlu/classifiers/diet_classifier.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 04a0cceac52b..45383114c84a 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -895,7 +895,7 @@ def _entity_label_to_tags(
         return predicted_tags, confidence_values
 
     def process(self, message: Message, **kwargs: Any) -> None:
-        """Augment the message with intents, entities, and diagnostic data."""
+        """Augments the message with intents, entities, and diagnostic data."""
         out = self._predict(message)
 
         if self.component_config[INTENT_CLASSIFICATION]:

From bfc67a85e50b631cb2acc6dc1231621c046b08e2 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Thu, 10 Dec 2020 10:34:19 +0100
Subject: [PATCH 068/102] Update rasa/nlu/components.py

Co-authored-by: Tobias Wochinger <t.wochinger@rasa.com>
---
 rasa/nlu/components.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py
index 81f070ea31c2..0621a33eab68 100644
--- a/rasa/nlu/components.py
+++ b/rasa/nlu/components.py
@@ -393,8 +393,8 @@ def name(self) -> Text:
         """Access the class's property name from an instance.
 
         Component class name is used when integrating it in a
-        pipeline. E.g. ``[ComponentA, ComponentB]``
-        will be a proper pipeline definition where ``ComponentA``
+        pipeline. E.g. `[ComponentA, ComponentB]`
+        will be a proper pipeline definition where `ComponentA`
         is the name of the first component of the pipeline.
         """
         return type(self).name

From 0b7aaa04ebd9f427c5423b10d23fb53c7f0951f5 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Thu, 10 Dec 2020 10:35:02 +0100
Subject: [PATCH 069/102] Update rasa/nlu/components.py

Co-authored-by: Tobias Wochinger <t.wochinger@rasa.com>
---
 rasa/nlu/components.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py
index 0621a33eab68..da5e396ab6b6 100644
--- a/rasa/nlu/components.py
+++ b/rasa/nlu/components.py
@@ -390,7 +390,7 @@ class Component(metaclass=ComponentMetaclass):
 
     @property
     def name(self) -> Text:
-        """Access the class's property name from an instance.
+        """Name of the component to be used in the model configuration.
 
         Component class name is used when integrating it in a
         pipeline. E.g. `[ComponentA, ComponentB]`

From c92998b845a3b66f2f7c619ffc1f05253d07dfc3 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Thu, 10 Dec 2020 10:35:23 +0100
Subject: [PATCH 070/102] Update rasa/nlu/components.py

Co-authored-by: Tobias Wochinger <t.wochinger@rasa.com>
---
 rasa/nlu/components.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py
index da5e396ab6b6..4578005bf9bd 100644
--- a/rasa/nlu/components.py
+++ b/rasa/nlu/components.py
@@ -401,7 +401,7 @@ def name(self) -> Text:
 
     @property
     def unique_name(self) -> Text:
-        """Get a unique name for the component in the pipeline.
+        """Gets a unique name for the component in the pipeline.
 
         The unique name can be used to distinguish components in
         a pipeline, e.g. when the pipeline contains multiple

From 411661631275dd30606a9fae015e4fb1cd2f0536 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Thu, 10 Dec 2020 10:35:38 +0100
Subject: [PATCH 071/102] Update rasa/nlu/components.py

Co-authored-by: Tobias Wochinger <t.wochinger@rasa.com>
---
 rasa/nlu/components.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py
index 4578005bf9bd..23363f194699 100644
--- a/rasa/nlu/components.py
+++ b/rasa/nlu/components.py
@@ -626,7 +626,7 @@ def cache_key(
         return None
 
     def __getstate__(self) -> Any:
-        """Get a copy of picklable parts of the component."""
+        """Gets a copy of picklable parts of the component."""
         d = self.__dict__.copy()
         # these properties should not be pickled
         if "partial_processing_context" in d:

From 34683318573037d3bb35f1664b3d1e21beb7821e Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Thu, 10 Dec 2020 10:35:57 +0100
Subject: [PATCH 072/102] Update rasa/shared/nlu/training_data/message.py

Co-authored-by: Tobias Wochinger <t.wochinger@rasa.com>
---
 rasa/shared/nlu/training_data/message.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/shared/nlu/training_data/message.py b/rasa/shared/nlu/training_data/message.py
index 4633043f173d..83741e0a68c8 100644
--- a/rasa/shared/nlu/training_data/message.py
+++ b/rasa/shared/nlu/training_data/message.py
@@ -66,7 +66,7 @@ def add_diagnostic_data(self, origin: Text, data: Dict[Text, Any]) -> None:
             self.data[DIAGNOSTIC_DATA] = {origin: data}
 
     def set(self, prop: Text, info: Any, add_to_output: bool = False) -> None:
-        """Set property `prop` to `info`.
+        """Sets the message's property to the given value.
 
         Args:
             prop: Name of the property to be set.

From 87f24188cac096caff5723eb883a45bccedd2e1c Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Thu, 10 Dec 2020 10:36:11 +0100
Subject: [PATCH 073/102] Update rasa/utils/tensorflow/tf_to_numpy.py

Co-authored-by: Tobias Wochinger <t.wochinger@rasa.com>
---
 rasa/utils/tensorflow/tf_to_numpy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/utils/tensorflow/tf_to_numpy.py b/rasa/utils/tensorflow/tf_to_numpy.py
index 19dd45de3c5b..762faa59ab74 100644
--- a/rasa/utils/tensorflow/tf_to_numpy.py
+++ b/rasa/utils/tensorflow/tf_to_numpy.py
@@ -3,7 +3,7 @@
 
 
 def values_to_numpy(data: Optional[Dict[Any, Any]]) -> Optional[Dict[Any, Any]]:
-    """Replace all tensorflow-tensor values with their numpy versions.
+    """Replaces all tensorflow-tensor values with their numpy versions.
 
     Args:
         data: Any dictionary for which values should be converted.

From 10035b75e631a540798f3b8dbbd560dd830c9c2f Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Thu, 10 Dec 2020 10:38:15 +0100
Subject: [PATCH 074/102] Update tests/nlu/classifiers/test_diet_classifier.py

Co-authored-by: Tobias Wochinger <t.wochinger@rasa.com>
---
 tests/nlu/classifiers/test_diet_classifier.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py
index 1ef442bd802c..08bafd08ba05 100644
--- a/tests/nlu/classifiers/test_diet_classifier.py
+++ b/tests/nlu/classifiers/test_diet_classifier.py
@@ -449,7 +449,7 @@ async def test_train_persist_load_with_composite_entities(
 
 
 async def test_process_gives_diagnostic_data(
-    component_builder: ComponentBuilder, tmpdir: Path
+    component_builder: ComponentBuilder, tmp_path: Path
 ):
     """Test if processing a message returns attention weights as numpy array."""
 

From 1246d8b40760e1cdf9d15786ac023152cfbef123 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Thu, 10 Dec 2020 10:38:42 +0100
Subject: [PATCH 075/102] Update tests/nlu/selectors/test_selectors.py

Co-authored-by: Tobias Wochinger <t.wochinger@rasa.com>
---
 tests/nlu/selectors/test_selectors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/nlu/selectors/test_selectors.py b/tests/nlu/selectors/test_selectors.py
index e4f83242af47..f389d6fcb50b 100644
--- a/tests/nlu/selectors/test_selectors.py
+++ b/tests/nlu/selectors/test_selectors.py
@@ -244,7 +244,7 @@ async def test_train_model_checkpointing(
 
 
 async def test_process_gives_diagnostic_data(
-    component_builder: ComponentBuilder, tmpdir: Path
+    component_builder: ComponentBuilder, tmp_path: Path
 ):
     """Test if processing a message returns attention weights as numpy array."""
 

From 9ab4a631d08f76c2e915440a43a866f5446cbbe7 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Thu, 10 Dec 2020 11:26:26 +0100
Subject: [PATCH 076/102] Make minor changes

---
 rasa/core/policies/policy.py                       |  3 ++-
 rasa/core/policies/ted_policy.py                   |  6 ++++--
 rasa/nlu/classifiers/diet_classifier.py            |  5 +++--
 rasa/nlu/selectors/response_selector.py            |  5 +++--
 rasa/shared/nlu/training_data/message.py           | 13 ++++++++-----
 rasa/utils/tensorflow/{tf_to_numpy.py => numpy.py} |  0
 tests/nlu/classifiers/test_diet_classifier.py      |  4 ++--
 tests/nlu/selectors/test_selectors.py              |  2 +-
 8 files changed, 23 insertions(+), 15 deletions(-)
 rename rasa/utils/tensorflow/{tf_to_numpy.py => numpy.py} (100%)

diff --git a/rasa/core/policies/policy.py b/rasa/core/policies/policy.py
index fd3e9cbf2d42..cad66de02db3 100644
--- a/rasa/core/policies/policy.py
+++ b/rasa/core/policies/policy.py
@@ -442,7 +442,8 @@ def __eq__(self, other: Any) -> bool:
             and self.events == other.events
             and self.optional_events == other.events
             and self.is_end_to_end_prediction == other.is_end_to_end_prediction
-            # We do not compare `diagnostic_data`
+            # We do not compare `diagnostic_data`, because it has no effect on the
+            # action prediction.
         )
 
     @property
diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index 0179526c4720..ab8df31abea7 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -30,7 +30,7 @@
 from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
 from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature
 from rasa.utils.tensorflow.model_data_utils import convert_to_data_format
-from rasa.utils.tensorflow.tf_to_numpy import values_to_numpy
+import rasa.utils.tensorflow.numpy
 from rasa.utils.tensorflow.constants import (
     LABEL,
     TRANSFORMER_SIZE,
@@ -399,7 +399,9 @@ def predict_action_probabilities(
 
         return self._prediction(
             confidence.tolist(),
-            diagnostic_data=values_to_numpy(output.get(DIAGNOSTIC_DATA)),
+            diagnostic_data=rasa.utils.tensorflow.numpy.values_to_numpy(
+                output.get(DIAGNOSTIC_DATA)
+            ),
         )
 
     def persist(self, path: Union[Text, Path]) -> None:
diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 45383114c84a..2d7d7c25a85a 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -25,7 +25,7 @@
 from rasa.utils.tensorflow import layers
 from rasa.utils.tensorflow.models import RasaModel, TransformerRasaModel
 from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature
-from rasa.utils.tensorflow.tf_to_numpy import values_to_numpy
+import rasa.utils.tensorflow.numpy
 from rasa.nlu.constants import TOKENS_NAMES
 from rasa.shared.nlu.constants import (
     TEXT,
@@ -911,7 +911,8 @@ def process(self, message: Message, **kwargs: Any) -> None:
 
         if out and DIAGNOSTIC_DATA in out:
             message.add_diagnostic_data(
-                self.unique_name, values_to_numpy(out.get(DIAGNOSTIC_DATA))
+                self.unique_name,
+                rasa.utils.tensorflow.numpy.values_to_numpy(out.get(DIAGNOSTIC_DATA)),
             )
 
     def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py
index e5dc9d8cf2cc..1c0776829c73 100644
--- a/rasa/nlu/selectors/response_selector.py
+++ b/rasa/nlu/selectors/response_selector.py
@@ -7,7 +7,7 @@
 from typing import Any, Dict, Optional, Text, Tuple, Union, List, Type
 
 from rasa.shared.constants import DIAGNOSTIC_DATA
-from rasa.utils.tensorflow.tf_to_numpy import values_to_numpy
+import rasa.utils.tensorflow.numpy
 from rasa.shared.nlu.training_data import util
 import rasa.shared.utils.io
 from rasa.shared.exceptions import InvalidConfigException
@@ -437,7 +437,8 @@ def process(self, message: Message, **kwargs: Any) -> None:
 
         if out and DIAGNOSTIC_DATA in out:
             message.add_diagnostic_data(
-                self.unique_name, values_to_numpy(out.get(DIAGNOSTIC_DATA))
+                self.unique_name,
+                rasa.utils.tensorflow.numpy.values_to_numpy(out.get(DIAGNOSTIC_DATA)),
             )
 
     def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
diff --git a/rasa/shared/nlu/training_data/message.py b/rasa/shared/nlu/training_data/message.py
index 83741e0a68c8..49118193d758 100644
--- a/rasa/shared/nlu/training_data/message.py
+++ b/rasa/shared/nlu/training_data/message.py
@@ -53,17 +53,20 @@ def add_features(self, features: Optional["Features"]) -> None:
             self.features.append(features)
 
     def add_diagnostic_data(self, origin: Text, data: Dict[Text, Any]) -> None:
-        """Add diagnostic data from the component with name `origin`."""
+        """Adds diagnostic data from the `origin` component.
+        
+        Args:
+            origin: Name of the component that created the data.
+            data: The diagnostic data.
+        """
         if origin in self.get(DIAGNOSTIC_DATA, {}):
             rasa.shared.utils.io.raise_warning(
                 f"Please make sure every pipeline component has a distinct name. "
                 f"The name '{self.name}' appears at least twice and diagnostic "
                 f"data will be overwritten."
             )
-        if DIAGNOSTIC_DATA in self.data:
-            self.data[DIAGNOSTIC_DATA][origin] = data
-        else:
-            self.data[DIAGNOSTIC_DATA] = {origin: data}
+        self.data.setdefault(DIAGNOSTIC_DATA, {})
+        self.data[DIAGNOSTIC_DATA][origin] = data
 
     def set(self, prop: Text, info: Any, add_to_output: bool = False) -> None:
         """Sets the message's property to the given value.
diff --git a/rasa/utils/tensorflow/tf_to_numpy.py b/rasa/utils/tensorflow/numpy.py
similarity index 100%
rename from rasa/utils/tensorflow/tf_to_numpy.py
rename to rasa/utils/tensorflow/numpy.py
diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py
index 08bafd08ba05..73a6de89013e 100644
--- a/tests/nlu/classifiers/test_diet_classifier.py
+++ b/tests/nlu/classifiers/test_diet_classifier.py
@@ -273,7 +273,7 @@ async def test_margin_loss_is_not_normalized(
     _config = RasaNLUModelConfig({"pipeline": pipeline})
     (trained_model, _, persisted_path) = await train(
         _config,
-        path=tmpdir.strpath,
+        path=str(tmpdir),
         data="data/test/many_intents.md",
         component_builder=component_builder,
     )
@@ -466,7 +466,7 @@ async def test_process_gives_diagnostic_data(
 
     (trainer, trained, persisted_path) = await train(
         _config,
-        path=tmpdir.strpath,
+        path=str(tmp_path),
         data="data/test/many_intents.md",
         component_builder=component_builder,
     )
diff --git a/tests/nlu/selectors/test_selectors.py b/tests/nlu/selectors/test_selectors.py
index f389d6fcb50b..cf1c71823495 100644
--- a/tests/nlu/selectors/test_selectors.py
+++ b/tests/nlu/selectors/test_selectors.py
@@ -268,7 +268,7 @@ async def test_process_gives_diagnostic_data(
 
     (trainer, trained, persisted_path) = await train(
         _config,
-        path=tmpdir.strpath,
+        path=str(tmp_path),
         data="data/test_selectors",
         component_builder=component_builder,
     )

From 3b9b59624f2f1e3214bfc9cd37da840fdfadc0eb Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Thu, 10 Dec 2020 12:34:57 +0100
Subject: [PATCH 077/102] Move TEDPolicy tests to separate module

---
 tests/core/policies/test_ted_policy.py | 346 ++++++++++++++++++++++++-
 tests/core/test_policies.py            | 330 +----------------------
 2 files changed, 345 insertions(+), 331 deletions(-)

diff --git a/tests/core/policies/test_ted_policy.py b/tests/core/policies/test_ted_policy.py
index be5aaa369f79..eb82ef9c7690 100644
--- a/tests/core/policies/test_ted_policy.py
+++ b/tests/core/policies/test_ted_policy.py
@@ -1,14 +1,39 @@
 from pathlib import Path
-from typing import List, Text
+from typing import Optional
+from unittest.mock import Mock
 
 import numpy as np
 import pytest
+import tests.core.test_policies
+from _pytest.monkeypatch import MonkeyPatch
+from rasa.core.featurizers.single_state_featurizer import SingleStateFeaturizer
+from rasa.core.featurizers.tracker_featurizers import (
+    MaxHistoryTrackerFeaturizer,
+    TrackerFeaturizer,
+)
+from rasa.core.policies.policy import Policy
 from rasa.core.policies.ted_policy import TEDPolicy
 from rasa.shared.core.constants import ACTION_LISTEN_NAME
 from rasa.shared.core.domain import Domain
-from rasa.shared.core.events import ActionExecuted, UserUttered
+from rasa.shared.core.events import (
+    ActionExecuted,
+    UserUttered,
+)
 from rasa.shared.core.trackers import DialogueStateTracker
 from rasa.shared.nlu.interpreter import RegexInterpreter
+from rasa.train import train_core
+from rasa.utils import train_utils
+from rasa.utils.tensorflow.constants import (
+    EVAL_NUM_EXAMPLES,
+    KEY_RELATIVE_ATTENTION,
+    LOSS_TYPE,
+    MAX_RELATIVE_POSITION,
+    RANKING_LENGTH,
+    SCALE_LOSS,
+    SIMILARITY_TYPE,
+    VALUE_RELATIVE_ATTENTION,
+)
+from tests.core.test_policies import PolicyTestCollection
 
 UTTER_GREET_ACTION = "utter_greet"
 GREET_INTENT_NAME = "greet"
@@ -41,3 +66,320 @@ def test_diagnostics():
     assert prediction.diagnostic_data
     assert "attention_weights" in prediction.diagnostic_data
     assert isinstance(prediction.diagnostic_data.get("attention_weights"), np.ndarray)
+
+
+class TestTEDPolicy(PolicyTestCollection):
+    def test_train_model_checkpointing(self, tmp_path: Path):
+        model_name = "core-checkpointed-model"
+        best_model_file = tmp_path / (model_name + ".tar.gz")
+        assert not best_model_file.exists()
+
+        train_core(
+            domain="data/test_domains/default.yml",
+            stories="data/test_stories/stories_defaultdomain.md",
+            output=str(tmp_path),
+            fixed_model_name=model_name,
+            config="data/test_config/config_ted_policy_model_checkpointing.yml",
+        )
+
+        assert best_model_file.exists()
+
+    def create_policy(
+        self, featurizer: Optional[TrackerFeaturizer], priority: int
+    ) -> Policy:
+        return TEDPolicy(featurizer=featurizer, priority=priority)
+
+    def test_similarity_type(self, trained_policy: TEDPolicy):
+        assert trained_policy.config[SIMILARITY_TYPE] == "inner"
+
+    def test_ranking_length(self, trained_policy: TEDPolicy):
+        assert trained_policy.config[RANKING_LENGTH] == 10
+
+    def test_normalization(
+        self,
+        trained_policy: TEDPolicy,
+        tracker: DialogueStateTracker,
+        default_domain: Domain,
+        monkeypatch: MonkeyPatch,
+    ):
+        # first check the output is what we expect
+        predicted_probabilities = trained_policy.predict_action_probabilities(
+            tracker, default_domain, RegexInterpreter()
+        ).probabilities
+        # count number of non-zero confidences
+        assert (
+            sum([confidence > 0 for confidence in predicted_probabilities])
+            == trained_policy.config[RANKING_LENGTH]
+        )
+        # check that the norm is still 1
+        assert sum(predicted_probabilities) == pytest.approx(1)
+
+        # also check our function is called
+        mock = Mock()
+        monkeypatch.setattr(train_utils, "normalize", mock.normalize)
+        trained_policy.predict_action_probabilities(
+            tracker, default_domain, RegexInterpreter()
+        )
+
+        mock.normalize.assert_called_once()
+
+    async def test_gen_batch(self, trained_policy: TEDPolicy, default_domain: Domain):
+        training_trackers = await tests.core.test_policies.train_trackers(
+            default_domain, augmentation_factor=0
+        )
+        interpreter = RegexInterpreter()
+        training_data, label_ids = trained_policy.featurize_for_training(
+            training_trackers, default_domain, interpreter
+        )
+        label_data, all_labels = trained_policy._create_label_data(
+            default_domain, interpreter
+        )
+        model_data = trained_policy._create_model_data(
+            training_data, label_ids, all_labels
+        )
+        batch_size = 2
+        (
+            batch_label_ids,
+            batch_entities_mask,
+            batch_entities_sentence_1,
+            batch_entities_sentence_2,
+            batch_entities_sentence_3,
+            batch_intent_mask,
+            batch_intent_sentence_1,
+            batch_intent_sentence_2,
+            batch_intent_sentence_3,
+            batch_slots_mask,
+            batch_slots_sentence_1,
+            batch_slots_sentence_2,
+            batch_slots_sentence_3,
+            batch_action_name_mask,
+            batch_action_name_sentence_1,
+            batch_action_name_sentence_2,
+            batch_action_name_sentence_3,
+            batch_dialogue_length,
+        ) = next(model_data._gen_batch(batch_size=batch_size))
+
+        assert (
+            batch_intent_mask.shape[0] == batch_size
+            and batch_action_name_mask.shape[0] == batch_size
+            and batch_entities_mask.shape[0] == batch_size
+            and batch_slots_mask.shape[0] == batch_size
+        )
+        assert (
+            batch_intent_sentence_3[1]
+            == batch_action_name_sentence_3[1]
+            == batch_entities_sentence_3[1]
+            == batch_slots_sentence_3[1]
+        )
+
+        (
+            batch_label_ids,
+            batch_entities_mask,
+            batch_entities_sentence_1,
+            batch_entities_sentence_2,
+            batch_entities_sentence_3,
+            batch_intent_mask,
+            batch_intent_sentence_1,
+            batch_intent_sentence_2,
+            batch_intent_sentence_3,
+            batch_slots_mask,
+            batch_slots_sentence_1,
+            batch_slots_sentence_2,
+            batch_slots_sentence_3,
+            batch_action_name_mask,
+            batch_action_name_sentence_1,
+            batch_action_name_sentence_2,
+            batch_action_name_sentence_3,
+            batch_dialogue_length,
+        ) = next(
+            model_data._gen_batch(
+                batch_size=batch_size, batch_strategy="balanced", shuffle=True
+            )
+        )
+
+        assert (
+            batch_intent_mask.shape[0] == batch_size
+            and batch_action_name_mask.shape[0] == batch_size
+            and batch_entities_mask.shape[0] == batch_size
+            and batch_slots_mask.shape[0] == batch_size
+        )
+        assert (
+            batch_intent_sentence_3[1]
+            == batch_action_name_sentence_3[1]
+            == batch_entities_sentence_3[1]
+            == batch_slots_sentence_3[1]
+        )
+
+
+class TestTEDPolicyMargin(TestTEDPolicy):
+    def create_policy(
+        self, featurizer: Optional[TrackerFeaturizer], priority: int
+    ) -> Policy:
+        return TEDPolicy(
+            featurizer=featurizer, priority=priority, **{LOSS_TYPE: "margin"}
+        )
+
+    def test_similarity_type(self, trained_policy: TEDPolicy):
+        assert trained_policy.config[SIMILARITY_TYPE] == "cosine"
+
+    def test_normalization(
+        self,
+        trained_policy: Policy,
+        tracker: DialogueStateTracker,
+        default_domain: Domain,
+        monkeypatch: MonkeyPatch,
+    ):
+        # Mock actual normalization method
+        mock = Mock()
+        monkeypatch.setattr(train_utils, "normalize", mock.normalize)
+        trained_policy.predict_action_probabilities(
+            tracker, default_domain, RegexInterpreter()
+        )
+
+        # function should not get called for margin loss_type
+        mock.normalize.assert_not_called()
+
+
+class TestTEDPolicyWithEval(TestTEDPolicy):
+    def create_policy(
+        self, featurizer: Optional[TrackerFeaturizer], priority: int
+    ) -> Policy:
+        return TEDPolicy(
+            featurizer=featurizer,
+            priority=priority,
+            **{SCALE_LOSS: False, EVAL_NUM_EXAMPLES: 4},
+        )
+
+
+class TestTEDPolicyNoNormalization(TestTEDPolicy):
+    def create_policy(
+        self, featurizer: Optional[TrackerFeaturizer], priority: int
+    ) -> Policy:
+        return TEDPolicy(
+            featurizer=featurizer, priority=priority, **{RANKING_LENGTH: 0}
+        )
+
+    def test_ranking_length(self, trained_policy: TEDPolicy):
+        assert trained_policy.config[RANKING_LENGTH] == 0
+
+    def test_normalization(
+        self,
+        trained_policy: Policy,
+        tracker: DialogueStateTracker,
+        default_domain: Domain,
+        monkeypatch: MonkeyPatch,
+    ):
+        # first check the output is what we expect
+        predicted_probabilities = trained_policy.predict_action_probabilities(
+            tracker, default_domain, RegexInterpreter()
+        ).probabilities
+        # there should be no normalization
+        assert all([confidence > 0 for confidence in predicted_probabilities])
+
+        # also check our function is not called
+        mock = Mock()
+        monkeypatch.setattr(train_utils, "normalize", mock.normalize)
+        trained_policy.predict_action_probabilities(
+            tracker, default_domain, RegexInterpreter()
+        )
+
+        mock.normalize.assert_not_called()
+
+
+class TestTEDPolicyLowRankingLength(TestTEDPolicy):
+    def create_policy(
+        self, featurizer: Optional[TrackerFeaturizer], priority: int
+    ) -> Policy:
+        return TEDPolicy(
+            featurizer=featurizer, priority=priority, **{RANKING_LENGTH: 3}
+        )
+
+    def test_ranking_length(self, trained_policy: TEDPolicy):
+        assert trained_policy.config[RANKING_LENGTH] == 3
+
+
+class TestTEDPolicyHighRankingLength(TestTEDPolicy):
+    def create_policy(
+        self, featurizer: Optional[TrackerFeaturizer], priority: int
+    ) -> Policy:
+        return TEDPolicy(
+            featurizer=featurizer, priority=priority, **{RANKING_LENGTH: 11}
+        )
+
+    def test_ranking_length(self, trained_policy: TEDPolicy):
+        assert trained_policy.config[RANKING_LENGTH] == 11
+
+
+class TestTEDPolicyWithStandardFeaturizer(TestTEDPolicy):
+    def create_policy(
+        self, featurizer: Optional[TrackerFeaturizer], priority: int
+    ) -> Policy:
+        # use standard featurizer from TEDPolicy,
+        # since it is using MaxHistoryTrackerFeaturizer
+        # if max_history is not specified
+        return TEDPolicy(priority=priority)
+
+    def test_featurizer(self, trained_policy: Policy, tmp_path: Path):
+        assert isinstance(trained_policy.featurizer, MaxHistoryTrackerFeaturizer)
+        assert isinstance(
+            trained_policy.featurizer.state_featurizer, SingleStateFeaturizer
+        )
+        trained_policy.persist(str(tmp_path))
+        loaded = trained_policy.__class__.load(str(tmp_path))
+        assert isinstance(loaded.featurizer, MaxHistoryTrackerFeaturizer)
+        assert isinstance(loaded.featurizer.state_featurizer, SingleStateFeaturizer)
+
+
+class TestTEDPolicyWithMaxHistory(TestTEDPolicy):
+    def create_policy(
+        self, featurizer: Optional[TrackerFeaturizer], priority: int
+    ) -> Policy:
+        # use standard featurizer from TEDPolicy,
+        # since it is using MaxHistoryTrackerFeaturizer
+        # if max_history is specified
+        return TEDPolicy(priority=priority, max_history=self.max_history)
+
+    def test_featurizer(self, trained_policy: Policy, tmp_path: Path):
+        assert isinstance(trained_policy.featurizer, MaxHistoryTrackerFeaturizer)
+        assert trained_policy.featurizer.max_history == self.max_history
+        assert isinstance(
+            trained_policy.featurizer.state_featurizer, SingleStateFeaturizer
+        )
+        trained_policy.persist(str(tmp_path))
+        loaded = trained_policy.__class__.load(str(tmp_path))
+        assert isinstance(loaded.featurizer, MaxHistoryTrackerFeaturizer)
+        assert loaded.featurizer.max_history == self.max_history
+        assert isinstance(loaded.featurizer.state_featurizer, SingleStateFeaturizer)
+
+
+class TestTEDPolicyWithRelativeAttention(TestTEDPolicy):
+    def create_policy(
+        self, featurizer: Optional[TrackerFeaturizer], priority: int
+    ) -> Policy:
+        return TEDPolicy(
+            featurizer=featurizer,
+            priority=priority,
+            **{
+                KEY_RELATIVE_ATTENTION: True,
+                VALUE_RELATIVE_ATTENTION: True,
+                MAX_RELATIVE_POSITION: 5,
+            },
+        )
+
+
+class TestTEDPolicyWithRelativeAttentionMaxHistoryOne(TestTEDPolicy):
+
+    max_history = 1
+
+    def create_policy(
+        self, featurizer: Optional[TrackerFeaturizer], priority: int
+    ) -> Policy:
+        return TEDPolicy(
+            featurizer=featurizer,
+            priority=priority,
+            **{
+                KEY_RELATIVE_ATTENTION: True,
+                VALUE_RELATIVE_ATTENTION: True,
+                MAX_RELATIVE_POSITION: 5,
+            },
+        )
diff --git a/tests/core/test_policies.py b/tests/core/test_policies.py
index b1a65f7e5d0f..6e186c875439 100644
--- a/tests/core/test_policies.py
+++ b/tests/core/test_policies.py
@@ -1,10 +1,9 @@
 from pathlib import Path
 from typing import Type, List, Text, Tuple, Optional, Any
-from unittest.mock import Mock, patch
+from unittest.mock import patch
 
 import numpy as np
 import pytest
-from _pytest.monkeypatch import MonkeyPatch
 
 from rasa.core.channels import OutputChannel
 from rasa.core.nlg import NaturalLanguageGenerator
@@ -48,18 +47,6 @@
 from rasa.core.policies.sklearn_policy import SklearnPolicy
 from rasa.shared.core.trackers import DialogueStateTracker
 from rasa.shared.nlu.training_data.formats.markdown import INTENT
-from rasa.utils.tensorflow.constants import (
-    SIMILARITY_TYPE,
-    RANKING_LENGTH,
-    LOSS_TYPE,
-    SCALE_LOSS,
-    EVAL_NUM_EXAMPLES,
-    KEY_RELATIVE_ATTENTION,
-    VALUE_RELATIVE_ATTENTION,
-    MAX_RELATIVE_POSITION,
-)
-from rasa.train import train_core
-from rasa.utils import train_utils
 from tests.core.conftest import (
     DEFAULT_DOMAIN_PATH_WITH_MAPPING,
     DEFAULT_DOMAIN_PATH_WITH_SLOTS,
@@ -336,321 +323,6 @@ def test_train_with_shuffle_false(
         policy.train(trackers, domain=default_domain, interpreter=RegexInterpreter())
 
 
-class TestTEDPolicy(PolicyTestCollection):
-    def test_train_model_checkpointing(self, tmp_path: Path):
-        model_name = "core-checkpointed-model"
-        best_model_file = tmp_path / (model_name + ".tar.gz")
-        assert not best_model_file.exists()
-
-        train_core(
-            domain="data/test_domains/default.yml",
-            stories="data/test_stories/stories_defaultdomain.md",
-            output=str(tmp_path),
-            fixed_model_name=model_name,
-            config="data/test_config/config_ted_policy_model_checkpointing.yml",
-        )
-
-        assert best_model_file.exists()
-
-    def create_policy(
-        self, featurizer: Optional[TrackerFeaturizer], priority: int
-    ) -> Policy:
-        return TEDPolicy(featurizer=featurizer, priority=priority)
-
-    def test_similarity_type(self, trained_policy: TEDPolicy):
-        assert trained_policy.config[SIMILARITY_TYPE] == "inner"
-
-    def test_ranking_length(self, trained_policy: TEDPolicy):
-        assert trained_policy.config[RANKING_LENGTH] == 10
-
-    def test_normalization(
-        self,
-        trained_policy: TEDPolicy,
-        tracker: DialogueStateTracker,
-        default_domain: Domain,
-        monkeypatch: MonkeyPatch,
-    ):
-        # first check the output is what we expect
-        predicted_probabilities = trained_policy.predict_action_probabilities(
-            tracker, default_domain, RegexInterpreter()
-        ).probabilities
-        # count number of non-zero confidences
-        assert (
-            sum([confidence > 0 for confidence in predicted_probabilities])
-            == trained_policy.config[RANKING_LENGTH]
-        )
-        # check that the norm is still 1
-        assert sum(predicted_probabilities) == pytest.approx(1)
-
-        # also check our function is called
-        mock = Mock()
-        monkeypatch.setattr(train_utils, "normalize", mock.normalize)
-        trained_policy.predict_action_probabilities(
-            tracker, default_domain, RegexInterpreter()
-        )
-
-        mock.normalize.assert_called_once()
-
-    async def test_gen_batch(self, trained_policy: TEDPolicy, default_domain: Domain):
-        training_trackers = await train_trackers(default_domain, augmentation_factor=0)
-        interpreter = RegexInterpreter()
-        training_data, label_ids = trained_policy.featurize_for_training(
-            training_trackers, default_domain, interpreter
-        )
-        label_data, all_labels = trained_policy._create_label_data(
-            default_domain, interpreter
-        )
-        model_data = trained_policy._create_model_data(
-            training_data, label_ids, all_labels
-        )
-        batch_size = 2
-        (
-            batch_label_ids,
-            batch_entities_mask,
-            batch_entities_sentence_1,
-            batch_entities_sentence_2,
-            batch_entities_sentence_3,
-            batch_intent_mask,
-            batch_intent_sentence_1,
-            batch_intent_sentence_2,
-            batch_intent_sentence_3,
-            batch_slots_mask,
-            batch_slots_sentence_1,
-            batch_slots_sentence_2,
-            batch_slots_sentence_3,
-            batch_action_name_mask,
-            batch_action_name_sentence_1,
-            batch_action_name_sentence_2,
-            batch_action_name_sentence_3,
-            batch_dialogue_length,
-        ) = next(model_data._gen_batch(batch_size=batch_size))
-
-        assert (
-            batch_intent_mask.shape[0] == batch_size
-            and batch_action_name_mask.shape[0] == batch_size
-            and batch_entities_mask.shape[0] == batch_size
-            and batch_slots_mask.shape[0] == batch_size
-        )
-        assert (
-            batch_intent_sentence_3[1]
-            == batch_action_name_sentence_3[1]
-            == batch_entities_sentence_3[1]
-            == batch_slots_sentence_3[1]
-        )
-
-        (
-            batch_label_ids,
-            batch_entities_mask,
-            batch_entities_sentence_1,
-            batch_entities_sentence_2,
-            batch_entities_sentence_3,
-            batch_intent_mask,
-            batch_intent_sentence_1,
-            batch_intent_sentence_2,
-            batch_intent_sentence_3,
-            batch_slots_mask,
-            batch_slots_sentence_1,
-            batch_slots_sentence_2,
-            batch_slots_sentence_3,
-            batch_action_name_mask,
-            batch_action_name_sentence_1,
-            batch_action_name_sentence_2,
-            batch_action_name_sentence_3,
-            batch_dialogue_length,
-        ) = next(
-            model_data._gen_batch(
-                batch_size=batch_size, batch_strategy="balanced", shuffle=True
-            )
-        )
-
-        assert (
-            batch_intent_mask.shape[0] == batch_size
-            and batch_action_name_mask.shape[0] == batch_size
-            and batch_entities_mask.shape[0] == batch_size
-            and batch_slots_mask.shape[0] == batch_size
-        )
-        assert (
-            batch_intent_sentence_3[1]
-            == batch_action_name_sentence_3[1]
-            == batch_entities_sentence_3[1]
-            == batch_slots_sentence_3[1]
-        )
-
-
-class TestTEDPolicyMargin(TestTEDPolicy):
-    def create_policy(
-        self, featurizer: Optional[TrackerFeaturizer], priority: int
-    ) -> Policy:
-        return TEDPolicy(
-            featurizer=featurizer, priority=priority, **{LOSS_TYPE: "margin"}
-        )
-
-    def test_similarity_type(self, trained_policy: TEDPolicy):
-        assert trained_policy.config[SIMILARITY_TYPE] == "cosine"
-
-    def test_normalization(
-        self,
-        trained_policy: Policy,
-        tracker: DialogueStateTracker,
-        default_domain: Domain,
-        monkeypatch: MonkeyPatch,
-    ):
-        # Mock actual normalization method
-        mock = Mock()
-        monkeypatch.setattr(train_utils, "normalize", mock.normalize)
-        trained_policy.predict_action_probabilities(
-            tracker, default_domain, RegexInterpreter()
-        )
-
-        # function should not get called for margin loss_type
-        mock.normalize.assert_not_called()
-
-
-class TestTEDPolicyWithEval(TestTEDPolicy):
-    def create_policy(
-        self, featurizer: Optional[TrackerFeaturizer], priority: int
-    ) -> Policy:
-        return TEDPolicy(
-            featurizer=featurizer,
-            priority=priority,
-            **{SCALE_LOSS: False, EVAL_NUM_EXAMPLES: 4},
-        )
-
-
-class TestTEDPolicyNoNormalization(TestTEDPolicy):
-    def create_policy(
-        self, featurizer: Optional[TrackerFeaturizer], priority: int
-    ) -> Policy:
-        return TEDPolicy(
-            featurizer=featurizer, priority=priority, **{RANKING_LENGTH: 0}
-        )
-
-    def test_ranking_length(self, trained_policy: TEDPolicy):
-        assert trained_policy.config[RANKING_LENGTH] == 0
-
-    def test_normalization(
-        self,
-        trained_policy: Policy,
-        tracker: DialogueStateTracker,
-        default_domain: Domain,
-        monkeypatch: MonkeyPatch,
-    ):
-        # first check the output is what we expect
-        predicted_probabilities = trained_policy.predict_action_probabilities(
-            tracker, default_domain, RegexInterpreter()
-        ).probabilities
-        # there should be no normalization
-        assert all([confidence > 0 for confidence in predicted_probabilities])
-
-        # also check our function is not called
-        mock = Mock()
-        monkeypatch.setattr(train_utils, "normalize", mock.normalize)
-        trained_policy.predict_action_probabilities(
-            tracker, default_domain, RegexInterpreter()
-        )
-
-        mock.normalize.assert_not_called()
-
-
-class TestTEDPolicyLowRankingLength(TestTEDPolicy):
-    def create_policy(
-        self, featurizer: Optional[TrackerFeaturizer], priority: int
-    ) -> Policy:
-        return TEDPolicy(
-            featurizer=featurizer, priority=priority, **{RANKING_LENGTH: 3}
-        )
-
-    def test_ranking_length(self, trained_policy: TEDPolicy):
-        assert trained_policy.config[RANKING_LENGTH] == 3
-
-
-class TestTEDPolicyHighRankingLength(TestTEDPolicy):
-    def create_policy(
-        self, featurizer: Optional[TrackerFeaturizer], priority: int
-    ) -> Policy:
-        return TEDPolicy(
-            featurizer=featurizer, priority=priority, **{RANKING_LENGTH: 11}
-        )
-
-    def test_ranking_length(self, trained_policy: TEDPolicy):
-        assert trained_policy.config[RANKING_LENGTH] == 11
-
-
-class TestTEDPolicyWithStandardFeaturizer(TestTEDPolicy):
-    def create_policy(
-        self, featurizer: Optional[TrackerFeaturizer], priority: int
-    ) -> Policy:
-        # use standard featurizer from TEDPolicy,
-        # since it is using MaxHistoryTrackerFeaturizer
-        # if max_history is not specified
-        return TEDPolicy(priority=priority)
-
-    def test_featurizer(self, trained_policy: Policy, tmp_path: Path):
-        assert isinstance(trained_policy.featurizer, MaxHistoryTrackerFeaturizer)
-        assert isinstance(
-            trained_policy.featurizer.state_featurizer, SingleStateFeaturizer
-        )
-        trained_policy.persist(str(tmp_path))
-        loaded = trained_policy.__class__.load(str(tmp_path))
-        assert isinstance(loaded.featurizer, MaxHistoryTrackerFeaturizer)
-        assert isinstance(loaded.featurizer.state_featurizer, SingleStateFeaturizer)
-
-
-class TestTEDPolicyWithMaxHistory(TestTEDPolicy):
-    def create_policy(
-        self, featurizer: Optional[TrackerFeaturizer], priority: int
-    ) -> Policy:
-        # use standard featurizer from TEDPolicy,
-        # since it is using MaxHistoryTrackerFeaturizer
-        # if max_history is specified
-        return TEDPolicy(priority=priority, max_history=self.max_history)
-
-    def test_featurizer(self, trained_policy: Policy, tmp_path: Path):
-        assert isinstance(trained_policy.featurizer, MaxHistoryTrackerFeaturizer)
-        assert trained_policy.featurizer.max_history == self.max_history
-        assert isinstance(
-            trained_policy.featurizer.state_featurizer, SingleStateFeaturizer
-        )
-        trained_policy.persist(str(tmp_path))
-        loaded = trained_policy.__class__.load(str(tmp_path))
-        assert isinstance(loaded.featurizer, MaxHistoryTrackerFeaturizer)
-        assert loaded.featurizer.max_history == self.max_history
-        assert isinstance(loaded.featurizer.state_featurizer, SingleStateFeaturizer)
-
-
-class TestTEDPolicyWithRelativeAttention(TestTEDPolicy):
-    def create_policy(
-        self, featurizer: Optional[TrackerFeaturizer], priority: int
-    ) -> Policy:
-        return TEDPolicy(
-            featurizer=featurizer,
-            priority=priority,
-            **{
-                KEY_RELATIVE_ATTENTION: True,
-                VALUE_RELATIVE_ATTENTION: True,
-                MAX_RELATIVE_POSITION: 5,
-            },
-        )
-
-
-class TestTEDPolicyWithRelativeAttentionMaxHistoryOne(TestTEDPolicy):
-
-    max_history = 1
-
-    def create_policy(
-        self, featurizer: Optional[TrackerFeaturizer], priority: int
-    ) -> Policy:
-        return TEDPolicy(
-            featurizer=featurizer,
-            priority=priority,
-            **{
-                KEY_RELATIVE_ATTENTION: True,
-                VALUE_RELATIVE_ATTENTION: True,
-                MAX_RELATIVE_POSITION: 5,
-            },
-        )
-
-
 class TestMemoizationPolicy(PolicyTestCollection):
     def create_policy(
         self, featurizer: Optional[TrackerFeaturizer], priority: int

From 03f9e79f2933aab28196c89b96d7225772dca879 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Mon, 14 Dec 2020 12:17:30 +0100
Subject: [PATCH 078/102] Remove blanks

---
 rasa/shared/nlu/training_data/message.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/shared/nlu/training_data/message.py b/rasa/shared/nlu/training_data/message.py
index 49118193d758..cd9ebf1c1a58 100644
--- a/rasa/shared/nlu/training_data/message.py
+++ b/rasa/shared/nlu/training_data/message.py
@@ -54,7 +54,7 @@ def add_features(self, features: Optional["Features"]) -> None:
 
     def add_diagnostic_data(self, origin: Text, data: Dict[Text, Any]) -> None:
         """Adds diagnostic data from the `origin` component.
-        
+
         Args:
             origin: Name of the component that created the data.
             data: The diagnostic data.

From bfc4f7e4932956d58c62588b5ccff096705086db Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Mon, 14 Dec 2020 12:23:48 +0100
Subject: [PATCH 079/102] Improve some doc strings

---
 rasa/nlu/components.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py
index 23363f194699..a2b819a13145 100644
--- a/rasa/nlu/components.py
+++ b/rasa/nlu/components.py
@@ -390,7 +390,7 @@ class Component(metaclass=ComponentMetaclass):
 
     @property
     def name(self) -> Text:
-        """Name of the component to be used in the model configuration.
+        """Returns the name of the component to be used in the model configuration.
 
         Component class name is used when integrating it in a
         pipeline. E.g. `[ComponentA, ComponentB]`
@@ -412,7 +412,7 @@ def unique_name(self) -> Text:
 
     @classmethod
     def required_components(cls) -> List[Type["Component"]]:
-        """Specify which components need to be present in the pipeline.
+        """Specifies which components need to be present in the pipeline.
 
         Which components are required by this component.
         Listed components should appear before the component itself in the pipeline.
@@ -464,7 +464,7 @@ def __init__(self, component_config: Optional[Dict[Text, Any]] = None) -> None:
 
     @classmethod
     def required_packages(cls) -> List[Text]:
-        """Specify which python packages need to be installed.
+        """Specifies which python packages need to be installed.
 
         E.g. ``["spacy"]``. More specifically, these should be
         importable python package names e.g. `sklearn` and not package
@@ -487,7 +487,7 @@ def load(
         cached_component: Optional["Component"] = None,
         **kwargs: Any,
     ) -> "Component":
-        """Load this component from file.
+        """Loads this component from file.
 
         After a component has been trained, it will be persisted by
         calling `persist`. When the pipeline gets loaded again,
@@ -534,7 +534,7 @@ def create(
         return cls(component_config)
 
     def provide_context(self) -> Optional[Dict[Text, Any]]:
-        """Initialize this component for a new pipeline.
+        """Initializes this component for a new pipeline.
 
         This function will be called before the training
         is started and before the first message is processed using
@@ -557,7 +557,7 @@ def train(
         config: Optional[RasaNLUModelConfig] = None,
         **kwargs: Any,
     ) -> None:
-        """Train this component.
+        """Trains this component.
 
         This is the components chance to train itself provided
         with the training data. The component can rely on
@@ -569,15 +569,13 @@ def train(
         of components previous to this one.
 
         Args:
-            training_data:
-                The :class:`rasa.shared.nlu.training_data.training_data.TrainingData`.
+            training_data: The :class:`rasa.shared.nlu.training_data.training_data.TrainingData`.
             config: The model configuration parameters.
-
         """
         pass
 
     def process(self, message: Message, **kwargs: Any) -> None:
-        """Process an incoming message.
+        """Processes an incoming message.
 
         This is the components chance to process an incoming
         message. The component can rely on
@@ -594,7 +592,7 @@ def process(self, message: Message, **kwargs: Any) -> None:
         pass
 
     def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]:
-        """Persist this component to disk for future loading.
+        """Persists this component to disk for future loading.
 
         Args:
             file_name: The file name of the model.

From e49fec78a3d5044f4ff6b401e01085852cdf1e4c Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 15 Dec 2020 10:34:39 +0100
Subject: [PATCH 080/102] Add test and minor improvements

---
 changelog/5673.improvement.md        |  6 ++---
 rasa/core/policies/ted_policy.py     | 11 ++++++++-
 tests/utils/tensorflow/test_numpy.py | 37 ++++++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 4 deletions(-)
 create mode 100644 tests/utils/tensorflow/test_numpy.py

diff --git a/changelog/5673.improvement.md b/changelog/5673.improvement.md
index 56f7a70e0e64..3d9ff009d570 100644
--- a/changelog/5673.improvement.md
+++ b/changelog/5673.improvement.md
@@ -1,9 +1,9 @@
 Expose diagnostic data for action and NLU predictions.
 
-Add `diagnostic_data` field to the [Message](https://rasa.com/docs/rasa/reference/rasa/shared/nlu/training_data/message#message-objects) 
-and [Prediction](https://rasa.com/docs/rasa/reference/rasa/core/policies/policy#policyprediction-objects) objects, which contain 
+Add `diagnostic_data` field to the [Message](/reference/rasa/shared/nlu/training_data/message#message-objects) 
+and [Prediction](/reference/rasa/core/policies/policy#policyprediction-objects) objects, which contain 
 information about attention weights and other intermediate results of the inference computation.
-This infromation can be used for debugging and fine-tuning, e.g. with RasaLit.
+This information can be used for debugging and fine-tuning, e.g. with [RasaLit](https://github.com/RasaHQ/rasalit).
 
 You can access diagnostic data for DIET like this (please define the `YOUR_RASA_MODEL_DIRECTORY` and `YOUR_RASA_MODEL_NAME` constants):
 
diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index ab8df31abea7..a0723e13948b 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -671,7 +671,16 @@ def _create_all_labels_embed(self) -> Tuple[tf.Tensor, tf.Tensor]:
     def _emebed_dialogue(
         self, dialogue_in: tf.Tensor, sequence_lengths: tf.Tensor
     ) -> Tuple[tf.Tensor, tf.Tensor, Optional[tf.Tensor]]:
-        """Create dialogue level embedding and mask."""
+        """Creates dialogue level embedding and mask.
+
+        Args:
+            dialogue_in: The encoded dialogue.
+            sequence_lengths: Length of each dialogue.
+
+        Returns:
+            The dialogue embedding, the mask, and (for diagnostic purposes) 
+            also the attention weights.
+        """
         mask = self._compute_mask(sequence_lengths)
 
         dialogue_transformed, attention_weights = self._tf_layers[
diff --git a/tests/utils/tensorflow/test_numpy.py b/tests/utils/tensorflow/test_numpy.py
new file mode 100644
index 000000000000..19ebf2dfca54
--- /dev/null
+++ b/tests/utils/tensorflow/test_numpy.py
@@ -0,0 +1,37 @@
+import pytest
+import tensorflow as tf
+import numpy as np
+import rasa.utils.tensorflow.numpy
+import json
+from typing import Optional, Dict, Any
+
+
+class NumpyEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, np.ndarray):
+            return obj.tolist()
+        return json.JSONEncoder.default(self, obj)
+
+
+@pytest.mark.parametrize(
+    "value, expected_result",
+    [
+        ({}, {}),
+        ({"a": 1}, {"a": 1}),
+        ({"a": tf.zeros((2, 3))}, {"a": np.zeros((2, 3))}),
+    ],
+)
+def test_values_to_numpy(
+    value: Optional[Dict[Any, Any]], expected_result: Optional[Dict[Any, Any]]
+):
+    actual_result = rasa.utils.tensorflow.numpy.values_to_numpy(value)
+    actual_result_value_types = [
+        type(value) for value in sorted(actual_result.values())
+    ]
+    expected_result_value_types = [
+        type(value) for value in sorted(actual_result.values())
+    ]
+    assert actual_result_value_types == expected_result_value_types
+    assert json.dumps(actual_result, sort_keys=True, cls=NumpyEncoder) == json.dumps(
+        expected_result, sort_keys=True, cls=NumpyEncoder
+    )

From 78ee2c799ccda2f05b6d62f9bf887c5d3aa7ac16 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 15 Dec 2020 10:56:10 +0100
Subject: [PATCH 081/102] Update relative paths

---
 changelog/5673.improvement.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/changelog/5673.improvement.md b/changelog/5673.improvement.md
index 3d9ff009d570..ec281d4802f5 100644
--- a/changelog/5673.improvement.md
+++ b/changelog/5673.improvement.md
@@ -1,7 +1,7 @@
 Expose diagnostic data for action and NLU predictions.
 
-Add `diagnostic_data` field to the [Message](/reference/rasa/shared/nlu/training_data/message#message-objects) 
-and [Prediction](/reference/rasa/core/policies/policy#policyprediction-objects) objects, which contain 
+Add `diagnostic_data` field to the [Message](./reference/rasa/shared/nlu/training_data/message#message-objects) 
+and [Prediction](./reference/rasa/core/policies/policy#policyprediction-objects) objects, which contain 
 information about attention weights and other intermediate results of the inference computation.
 This information can be used for debugging and fine-tuning, e.g. with [RasaLit](https://github.com/RasaHQ/rasalit).
 

From 214956ad399abb6ab64b58f95c377ea49ae2adef Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 15 Dec 2020 10:57:35 +0100
Subject: [PATCH 082/102] Update relative paths

---
 changelog/5673.improvement.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/changelog/5673.improvement.md b/changelog/5673.improvement.md
index ec281d4802f5..f3c8a20b51d6 100644
--- a/changelog/5673.improvement.md
+++ b/changelog/5673.improvement.md
@@ -1,7 +1,7 @@
 Expose diagnostic data for action and NLU predictions.
 
-Add `diagnostic_data` field to the [Message](./reference/rasa/shared/nlu/training_data/message#message-objects) 
-and [Prediction](./reference/rasa/core/policies/policy#policyprediction-objects) objects, which contain 
+Add `diagnostic_data` field to the [Message](../docs/reference/rasa/shared/nlu/training_data/message#message-objects) 
+and [Prediction](../docs/reference/rasa/core/policies/policy#policyprediction-objects) objects, which contain 
 information about attention weights and other intermediate results of the inference computation.
 This information can be used for debugging and fine-tuning, e.g. with [RasaLit](https://github.com/RasaHQ/rasalit).
 

From 644199c49dddc99d4c4dfb2d0649c599271a5f17 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <jem-mosig@protonmail.com>
Date: Tue, 15 Dec 2020 12:21:41 +0100
Subject: [PATCH 083/102] Update relative paths

---
 changelog/5673.improvement.md    | 4 ++--
 rasa/core/policies/ted_policy.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/changelog/5673.improvement.md b/changelog/5673.improvement.md
index f3c8a20b51d6..ec281d4802f5 100644
--- a/changelog/5673.improvement.md
+++ b/changelog/5673.improvement.md
@@ -1,7 +1,7 @@
 Expose diagnostic data for action and NLU predictions.
 
-Add `diagnostic_data` field to the [Message](../docs/reference/rasa/shared/nlu/training_data/message#message-objects) 
-and [Prediction](../docs/reference/rasa/core/policies/policy#policyprediction-objects) objects, which contain 
+Add `diagnostic_data` field to the [Message](./reference/rasa/shared/nlu/training_data/message#message-objects) 
+and [Prediction](./reference/rasa/core/policies/policy#policyprediction-objects) objects, which contain 
 information about attention weights and other intermediate results of the inference computation.
 This information can be used for debugging and fine-tuning, e.g. with [RasaLit](https://github.com/RasaHQ/rasalit).
 
diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index 60480bb71d1a..c15c4f9f91b7 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -697,7 +697,7 @@ def _emebed_dialogue(
             sequence_lengths: Length of each dialogue.
 
         Returns:
-            The dialogue embedding, the mask, and (for diagnostic purposes) 
+            The dialogue embedding, the mask, and (for diagnostic purposes)
             also the attention weights.
         """
         mask = self._compute_mask(sequence_lengths)

From 26a43b050907ecf0edac8ea120e6c0bca951433e Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Tue, 5 Jan 2021 11:37:17 +0100
Subject: [PATCH 084/102] Draft component index fingerprint bug fix

---
 rasa/model.py      | 18 ++++++++++++++++++
 rasa/nlu/config.py |  7 ++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/rasa/model.py b/rasa/model.py
index ff057cf08596..562bd4f6641d 100644
--- a/rasa/model.py
+++ b/rasa/model.py
@@ -314,6 +314,8 @@ async def model_fingerprint(file_importer: "TrainingDataImporter") -> Fingerprin
     stories = await file_importer.get_stories()
     nlu_data = await file_importer.get_nlu_data()
 
+    update_component_indices_of_pipeline(config)
+
     responses = domain.templates
 
     # Do a copy of the domain to not change the actual domain (shallow is enough)
@@ -374,6 +376,8 @@ def _get_fingerprint_of_config_without_epochs(
             for p in copied_config[key]:
                 if "epochs" in p:
                     del p["epochs"]
+                # if COMPONENT_INDEX in p:  # ToDo: Remove comment
+                #     del p[COMPONENT_INDEX]
 
     return rasa.shared.utils.io.deep_container_fingerprint(copied_config)
 
@@ -616,3 +620,17 @@ def get_model_for_finetuning(
         "contains no model or model file cannot be found."
     )
     return None
+
+
+def update_component_indices_of_pipeline(
+    config: Optional[Dict[Text, Any]] = None
+) -> None:
+    """Adds component indices to the pipeline in the given config.
+    
+    Args:
+        config: Configuration.
+    """
+    if not config or not "pipeline" in config:
+        return
+    for index, component in enumerate(config["pipeline"]):
+        component["index"] = index
diff --git a/rasa/nlu/config.py b/rasa/nlu/config.py
index 9ce4a2811354..2b238b508f8a 100644
--- a/rasa/nlu/config.py
+++ b/rasa/nlu/config.py
@@ -162,6 +162,11 @@ def set_component_attr(self, index, **kwargs) -> None:
                 docs=DOCS_URL_PIPELINE,
             )
 
-    def override(self, config) -> None:
+    def override(self, config: Optional[Dict[Text, Any]] = None) -> None:
+        """Overrides default config with given values.
+
+        Args:
+            config: New values for the configuration.
+        """
         if config:
             self.__dict__.update(config)

From 5fd02cab34db9e3b4db54125572f578bb0b16d0b Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Wed, 6 Jan 2021 12:45:33 +0100
Subject: [PATCH 085/102] Fix component index problem

---
 rasa/model.py      | 18 ------------------
 rasa/nlu/config.py |  3 ++-
 2 files changed, 2 insertions(+), 19 deletions(-)

diff --git a/rasa/model.py b/rasa/model.py
index 562bd4f6641d..ff057cf08596 100644
--- a/rasa/model.py
+++ b/rasa/model.py
@@ -314,8 +314,6 @@ async def model_fingerprint(file_importer: "TrainingDataImporter") -> Fingerprin
     stories = await file_importer.get_stories()
     nlu_data = await file_importer.get_nlu_data()
 
-    update_component_indices_of_pipeline(config)
-
     responses = domain.templates
 
     # Do a copy of the domain to not change the actual domain (shallow is enough)
@@ -376,8 +374,6 @@ def _get_fingerprint_of_config_without_epochs(
             for p in copied_config[key]:
                 if "epochs" in p:
                     del p["epochs"]
-                # if COMPONENT_INDEX in p:  # ToDo: Remove comment
-                #     del p[COMPONENT_INDEX]
 
     return rasa.shared.utils.io.deep_container_fingerprint(copied_config)
 
@@ -620,17 +616,3 @@ def get_model_for_finetuning(
         "contains no model or model file cannot be found."
     )
     return None
-
-
-def update_component_indices_of_pipeline(
-    config: Optional[Dict[Text, Any]] = None
-) -> None:
-    """Adds component indices to the pipeline in the given config.
-    
-    Args:
-        config: Configuration.
-    """
-    if not config or not "pipeline" in config:
-        return
-    for index, component in enumerate(config["pipeline"]):
-        component["index"] = index
diff --git a/rasa/nlu/config.py b/rasa/nlu/config.py
index 2b238b508f8a..ba4653de101d 100644
--- a/rasa/nlu/config.py
+++ b/rasa/nlu/config.py
@@ -1,3 +1,4 @@
+import copy
 import logging
 import os
 from typing import Any, Dict, List, Optional, Text, Union
@@ -69,7 +70,7 @@ def component_config_from_pipeline(
         by the given defaults.
     """
     try:
-        configuration = pipeline[index]
+        configuration = copy.deepcopy(pipeline[index])
         configuration[COMPONENT_INDEX] = index
         return rasa.utils.train_utils.override_defaults(defaults, configuration)
     except IndexError:

From 65e7fd186eb0ff4d727d8212cb2ef23fa57c313b Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Wed, 6 Jan 2021 13:32:06 +0100
Subject: [PATCH 086/102] Update relative paths

---
 changelog/5673.improvement.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/changelog/5673.improvement.md b/changelog/5673.improvement.md
index ec281d4802f5..a1f457534b9c 100644
--- a/changelog/5673.improvement.md
+++ b/changelog/5673.improvement.md
@@ -1,7 +1,7 @@
 Expose diagnostic data for action and NLU predictions.
 
-Add `diagnostic_data` field to the [Message](./reference/rasa/shared/nlu/training_data/message#message-objects) 
-and [Prediction](./reference/rasa/core/policies/policy#policyprediction-objects) objects, which contain 
+Add `diagnostic_data` field to the [Message](./reference/rasa/shared/nlu/training_data/message) 
+and [Prediction](./reference/rasa/core/policies/ensemble#prediction-objects) objects, which contain 
 information about attention weights and other intermediate results of the inference computation.
 This information can be used for debugging and fine-tuning, e.g. with [RasaLit](https://github.com/RasaHQ/rasalit).
 

From a97c97d549c5867bcb2a224ed6b74b808a57b949 Mon Sep 17 00:00:00 2001
From: m-vdb <m.verger@rasa.com>
Date: Wed, 6 Jan 2021 14:25:29 +0100
Subject: [PATCH 087/102] fix changelog links

---
 changelog/5673.improvement.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/changelog/5673.improvement.md b/changelog/5673.improvement.md
index a1f457534b9c..90f0c93996ff 100644
--- a/changelog/5673.improvement.md
+++ b/changelog/5673.improvement.md
@@ -1,7 +1,7 @@
 Expose diagnostic data for action and NLU predictions.
 
-Add `diagnostic_data` field to the [Message](./reference/rasa/shared/nlu/training_data/message) 
-and [Prediction](./reference/rasa/core/policies/ensemble#prediction-objects) objects, which contain 
+Add `diagnostic_data` field to the [Message](./reference/rasa/shared/nlu/training_data/message.md)
+and [Prediction](./reference/rasa/core/policies/ensemble.md#prediction-objects) objects, which contain
 information about attention weights and other intermediate results of the inference computation.
 This information can be used for debugging and fine-tuning, e.g. with [RasaLit](https://github.com/RasaHQ/rasalit).
 

From 511c9d83689997c3b75056d623fc7331bdaf55ec Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Wed, 6 Jan 2021 14:45:39 +0100
Subject: [PATCH 088/102] Fix relative paths again

---
 changelog/5673.improvement.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/changelog/5673.improvement.md b/changelog/5673.improvement.md
index 90f0c93996ff..95f047ff1d69 100644
--- a/changelog/5673.improvement.md
+++ b/changelog/5673.improvement.md
@@ -1,7 +1,7 @@
 Expose diagnostic data for action and NLU predictions.
 
-Add `diagnostic_data` field to the [Message](./reference/rasa/shared/nlu/training_data/message.md)
-and [Prediction](./reference/rasa/core/policies/ensemble.md#prediction-objects) objects, which contain
+Add `diagnostic_data` field to the [Message](./reference/rasa/shared/nlu/training_data/message.md#message-objects)
+and [Prediction](./reference/rasa/core/policies/ensemble.md#policyprediction-objects) objects, which contain
 information about attention weights and other intermediate results of the inference computation.
 This information can be used for debugging and fine-tuning, e.g. with [RasaLit](https://github.com/RasaHQ/rasalit).
 

From 885380135da533c230f53c39faf1f34b58eb1904 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Wed, 6 Jan 2021 15:10:37 +0100
Subject: [PATCH 089/102] Another relative paths fix

---
 changelog/5673.improvement.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changelog/5673.improvement.md b/changelog/5673.improvement.md
index 95f047ff1d69..b224c30091ac 100644
--- a/changelog/5673.improvement.md
+++ b/changelog/5673.improvement.md
@@ -1,7 +1,7 @@
 Expose diagnostic data for action and NLU predictions.
 
 Add `diagnostic_data` field to the [Message](./reference/rasa/shared/nlu/training_data/message.md#message-objects)
-and [Prediction](./reference/rasa/core/policies/ensemble.md#policyprediction-objects) objects, which contain
+and [Prediction](./reference/rasa/core/policies/policy.md#policyprediction-objects) objects, which contain
 information about attention weights and other intermediate results of the inference computation.
 This information can be used for debugging and fine-tuning, e.g. with [RasaLit](https://github.com/RasaHQ/rasalit).
 

From b600da77757af14d5e6c18fa41d64dfde5d5853e Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Wed, 6 Jan 2021 16:47:45 +0100
Subject: [PATCH 090/102] Fix typos

---
 tests/nlu/classifiers/test_diet_classifier.py | 2 +-
 tests/nlu/selectors/test_selectors.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py
index 195068b53a18..1b7ed3eda8ad 100644
--- a/tests/nlu/classifiers/test_diet_classifier.py
+++ b/tests/nlu/classifiers/test_diet_classifier.py
@@ -509,7 +509,7 @@ async def test_train_persist_load_with_composite_entities(
 async def test_process_gives_diagnostic_data(
     component_builder: ComponentBuilder, tmp_path: Path
 ):
-    """Test if processing a message returns attention weights as numpy array."""
+    """Tests if processing a message returns attention weights as numpy array."""
 
     _config = RasaNLUModelConfig(
         {
diff --git a/tests/nlu/selectors/test_selectors.py b/tests/nlu/selectors/test_selectors.py
index e03fae60a905..98a6afb719b6 100644
--- a/tests/nlu/selectors/test_selectors.py
+++ b/tests/nlu/selectors/test_selectors.py
@@ -290,7 +290,7 @@ async def test_train_persist_load(component_builder: ComponentBuilder, tmpdir: P
 async def test_process_gives_diagnostic_data(
     component_builder: ComponentBuilder, tmp_path: Path
 ):
-    """Test if processing a message returns attention weights as numpy array."""
+    """Tests if processing a message returns attention weights as numpy array."""
 
     _config = RasaNLUModelConfig(
         {

From 9d93f547cdb07534218e41c6911922d48b4b3214 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Mon, 11 Jan 2021 18:46:21 +0100
Subject: [PATCH 091/102] Add a test

---
 rasa/nlu/classifiers/diet_classifier.py        | 1 +
 rasa/shared/nlu/training_data/message.py       | 2 +-
 tests/shared/nlu/training_data/test_message.py | 7 +++++++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 2b671d2cf911..67eb8764516a 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -13,6 +13,7 @@
 import rasa.shared.utils.io
 import rasa.utils.io as io_utils
 import rasa.nlu.utils.bilou_utils as bilou_utils
+import rasa.utils.tensorflow.numpy
 from rasa.shared.constants import DIAGNOSTIC_DATA
 from rasa.nlu.featurizers.featurizer import Featurizer
 from rasa.nlu.components import Component
diff --git a/rasa/shared/nlu/training_data/message.py b/rasa/shared/nlu/training_data/message.py
index 4652ea00c7a1..e2cb697124fb 100644
--- a/rasa/shared/nlu/training_data/message.py
+++ b/rasa/shared/nlu/training_data/message.py
@@ -62,7 +62,7 @@ def add_diagnostic_data(self, origin: Text, data: Dict[Text, Any]) -> None:
         if origin in self.get(DIAGNOSTIC_DATA, {}):
             rasa.shared.utils.io.raise_warning(
                 f"Please make sure every pipeline component has a distinct name. "
-                f"The name '{self.name}' appears at least twice and diagnostic "
+                f"The name '{origin}' appears at least twice and diagnostic "
                 f"data will be overwritten."
             )
         self.data.setdefault(DIAGNOSTIC_DATA, {})
diff --git a/tests/shared/nlu/training_data/test_message.py b/tests/shared/nlu/training_data/test_message.py
index 25ad054668f9..cb26a0dc022c 100644
--- a/tests/shared/nlu/training_data/test_message.py
+++ b/tests/shared/nlu/training_data/test_message.py
@@ -266,3 +266,10 @@ def test_is_core_or_domain_message(
     message: Message, result: bool,
 ):
     assert result == message.is_core_or_domain_message()
+
+
+def test_add_diagnostic_data_with_repeated_component_raises_warning():
+    message = Message()
+    message.add_diagnostic_data("a", {})
+    with pytest.warns(UserWarning):
+        message.add_diagnostic_data("a", {})

From 4a0bd38e8b1399f77ff91a5787ccd4e71b086935 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Fri, 15 Jan 2021 11:44:10 +0100
Subject: [PATCH 092/102] Add documentation

---
 docs/docs/tuning-your-model.mdx | 92 +++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/docs/docs/tuning-your-model.mdx b/docs/docs/tuning-your-model.mdx
index 63dcde2ad299..c225c86b84b1 100644
--- a/docs/docs/tuning-your-model.mdx
+++ b/docs/docs/tuning-your-model.mdx
@@ -364,3 +364,95 @@ You may want to limit the absolute amount of GPU memory that can be used by a Ra
 
 For example, say you have two visible GPUs(`GPU:0` and `GPU:1`) and you want to allocate 1024 MB from the first GPU
 and 2048 MB from the second GPU. You can do this by setting the environment variable `TF_GPU_MEMORY_ALLOC` to `"0:1024, 1:2048"`.
+
+
+## Accessing Diagnostic Data
+
+To gain a better understanding of what your models do, you can access intermediate results of the prediction process.
+To do this, you need to access the `diagnostic_data` field of the [Message](./reference/rasa/shared/nlu/training_data/message.md#message-objects)
+and [Prediction](./reference/rasa/core/policies/policy.md#policyprediction-objects) objects, which contain
+information about attention weights and other intermediate results of the inference computation.
+You can use this information for debugging and fine-tuning, e.g. with [RasaLit](https://github.com/RasaHQ/rasalit).
+
+After you've [trained a model](./reference/rasa/command-line-interface.md#rasa-train), you can access diagnostic data for DIET like this 
+(please define the `YOUR_RASA_MODEL_DIRECTORY` and `YOUR_RASA_MODEL_NAME` constants):
+
+```python
+from rasa.cli.utils import get_validated_path
+from rasa.model import get_model, get_model_subdirectories
+from rasa.nlu.model import Interpreter
+from rasa.shared.nlu.training_data.message import Message
+from rasa.shared.nlu.constants import TEXT
+from rasa.shared.constants import DIAGNOSTIC_DATA
+import pathlib
+
+
+def load_interpreter(model_dir, model):
+    path_str = str(pathlib.Path(model_dir) / model)
+    model = get_validated_path(path_str, "model")
+    model_path = get_model(model)
+    _, nlu_model = get_model_subdirectories(model_path)
+    return Interpreter.load(nlu_model)
+
+
+if __name__ == "__main__":
+    interpreter = load_interpreter(YOUR_RASA_MODEL_DIRECTORY, f"{YOUR_RASA_MODEL_NAME}.tar.gz")
+    data = interpreter.default_output_attributes()
+    data[TEXT] = "hello world"
+    message = Message(data=data)
+    for e in interpreter.pipeline:
+        e.process(message)
+    nlu_diagnostic_data = message.as_dict()[DIAGNOSTIC_DATA]
+
+    for component_name, diagnostic_data in nlu_diagnostic_data.items():
+        print(f"attention_weights for {component_name}:")
+        attention_weights = diagnostic_data["attention_weights"]
+        print(attention_weights)
+
+        print(f"\ntext_transformed for {component_name}:")
+        text_transformed = diagnostic_data["text_transformed"]
+        print(text_transformed)
+
+```
+
+And you can access diagnostic data for TED like this:
+
+```python
+from rasa.core.policies.ted_policy import TEDPolicy
+from rasa.shared.core.constants import ACTION_LISTEN_NAME
+from rasa.shared.core.domain import Domain
+from rasa.shared.core.events import ActionExecuted, UserUttered
+from rasa.shared.core.trackers import DialogueStateTracker
+from rasa.shared.nlu.interpreter import RegexInterpreter
+
+
+UTTER_GREET_ACTION = "utter_greet"
+GREET_INTENT_NAME = "greet"
+DOMAIN_YAML = f"""
+intents:
+- {GREET_INTENT_NAME}
+actions:
+- {UTTER_GREET_ACTION}
+"""
+
+
+if __name__ == "__main__":
+    domain = Domain.from_yaml(DOMAIN_YAML)
+    policy = TEDPolicy()
+    GREET_RULE = DialogueStateTracker.from_events(
+        "greet rule",
+        evts=[
+            UserUttered(intent={"name": GREET_INTENT_NAME}),
+            ActionExecuted(UTTER_GREET_ACTION),
+            ActionExecuted(ACTION_LISTEN_NAME),
+            UserUttered(intent={"name": GREET_INTENT_NAME}),
+            ActionExecuted(ACTION_LISTEN_NAME),
+        ],
+    )
+    policy.train([GREET_RULE], domain, RegexInterpreter())
+    prediction = policy.predict_action_probabilities(
+        GREET_RULE, domain, RegexInterpreter()
+    )
+
+    print(f"{prediction.diagnostic_data.get('attention_weights')}")
+```

From 69b564ec533a159ac0b4995c89238ea6957ee600 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Fri, 15 Jan 2021 19:21:17 +0100
Subject: [PATCH 093/102] Re-use trained model for test

---
 tests/nlu/classifiers/test_diet_classifier.py | 36 ++++++-------------
 1 file changed, 10 insertions(+), 26 deletions(-)

diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py
index 1b7ed3eda8ad..389b5c004ee9 100644
--- a/tests/nlu/classifiers/test_diet_classifier.py
+++ b/tests/nlu/classifiers/test_diet_classifier.py
@@ -5,6 +5,7 @@
 from unittest.mock import Mock
 from typing import List, Text, Dict, Any
 
+import rasa
 from rasa.shared.nlu.training_data.features import Features
 from rasa.nlu import train
 from rasa.nlu.classifiers import LABEL_RANKING_LENGTH
@@ -38,6 +39,7 @@
 from rasa.shared.constants import DIAGNOSTIC_DATA
 from tests.conftest import DEFAULT_NLU_DATA
 from tests.nlu.conftest import DEFAULT_DATA_PATH
+from rasa.core.agent import Agent
 
 
 def test_compute_default_label_features():
@@ -507,41 +509,23 @@ async def test_train_persist_load_with_composite_entities(
 
 
 async def test_process_gives_diagnostic_data(
-    component_builder: ComponentBuilder, tmp_path: Path
+    component_builder: ComponentBuilder, trained_nlu_moodbot_path: Text,
 ):
     """Tests if processing a message returns attention weights as numpy array."""
-
-    _config = RasaNLUModelConfig(
-        {
-            "pipeline": [
-                {"name": "WhitespaceTokenizer"},
-                {"name": "CountVectorsFeaturizer"},
-                {"name": "DIETClassifier", RANDOM_SEED: 1, EPOCHS: 1},
-            ],
-            "language": "en",
-        }
-    )
-
-    (trainer, trained, persisted_path) = await train(
-        _config,
-        path=str(tmp_path),
-        data="data/test/many_intents.md",
-        component_builder=component_builder,
-    )
-
-    assert trainer.pipeline
-    assert trained.pipeline
-
-    loaded = Interpreter.load(persisted_path, component_builder)
+    with rasa.model.unpack_model(trained_nlu_moodbot_path) as unpacked_model_directory:
+        _, nlu_model_directory = rasa.model.get_model_subdirectories(
+            unpacked_model_directory
+        )
+        interpreter = Interpreter.load(nlu_model_directory, component_builder)
 
     message = Message(data={TEXT: "hello"})
-    for component in loaded.pipeline:
+    for component in interpreter.pipeline:
         component.process(message)
 
     diagnostic_data = message.get(DIAGNOSTIC_DATA)
 
     # The last component is DIETClassifier, which should add attention weights
-    name = "component_2_DIETClassifier"
+    name = f"component_{len(interpreter.pipeline) - 1}_DIETClassifier"
     assert isinstance(diagnostic_data, dict)
     assert name in diagnostic_data
     assert "attention_weights" in diagnostic_data[name]

From 9c1378f6f288d086419fbb99264cd7a9ebdacd25 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Fri, 15 Jan 2021 19:36:52 +0100
Subject: [PATCH 094/102] Move documentation under common headline

---
 docs/docs/tuning-your-model.mdx | 129 +++++++++-----------------------
 1 file changed, 36 insertions(+), 93 deletions(-)

diff --git a/docs/docs/tuning-your-model.mdx b/docs/docs/tuning-your-model.mdx
index c225c86b84b1..89283a95a459 100644
--- a/docs/docs/tuning-your-model.mdx
+++ b/docs/docs/tuning-your-model.mdx
@@ -293,7 +293,9 @@ Here is a summary of the available extractors and what they are best used for:
 |`MitieEntityExtractor`    |MITIE             |structured SVM                                   |good for training custom entities |
 |`EntitySynonymMapper`     |existing entities |N/A                                              |maps known synonyms               |
 
-## Handling Class Imbalance
+## Improving Performance
+
+### Handling Class Imbalance
 
 Classification algorithms often do not perform well if there is a large class imbalance,
 for example if you have a lot of training data for some intents and very little training data for others.
@@ -312,6 +314,39 @@ pipeline:
   batch_strategy: sequence
 ```
 
+### Accessing Diagnostic Data
+
+To gain a better understanding of what your models do, you can access intermediate results of the prediction process.
+To do this, you need to access the `diagnostic_data` field of the [Message](./reference/rasa/shared/nlu/training_data/message.md#message-objects)
+and [Prediction](./reference/rasa/core/policies/policy.md#policyprediction-objects) objects, which contain
+information about attention weights and other intermediate results of the inference computation.
+You can use this information for debugging and fine-tuning, e.g. with [RasaLit](https://github.com/RasaHQ/rasalit).
+
+After you've [trained a model](.//command-line-interface.mdx#rasa-train), you can access diagnostic data for DIET, 
+given a processed message, like this:
+
+```python
+nlu_diagnostic_data = message.as_dict()[DIAGNOSTIC_DATA]
+
+for component_name, diagnostic_data in nlu_diagnostic_data.items():
+    attention_weights = diagnostic_data["attention_weights"]
+    print(f"attention_weights for {component_name}:")
+    print(attention_weights)
+
+    text_transformed = diagnostic_data["text_transformed"]
+    print(f"\ntext_transformed for {component_name}:")
+    print(text_transformed)
+```
+
+And you can access diagnostic data for TED like this:
+
+```python
+prediction = policy.predict_action_probabilities(
+    GREET_RULE, domain, RegexInterpreter()
+)
+print(f"{prediction.diagnostic_data.get('attention_weights')}")
+```
+
 
 ## Configuring Tensorflow
 
@@ -364,95 +399,3 @@ You may want to limit the absolute amount of GPU memory that can be used by a Ra
 
 For example, say you have two visible GPUs(`GPU:0` and `GPU:1`) and you want to allocate 1024 MB from the first GPU
 and 2048 MB from the second GPU. You can do this by setting the environment variable `TF_GPU_MEMORY_ALLOC` to `"0:1024, 1:2048"`.
-
-
-## Accessing Diagnostic Data
-
-To gain a better understanding of what your models do, you can access intermediate results of the prediction process.
-To do this, you need to access the `diagnostic_data` field of the [Message](./reference/rasa/shared/nlu/training_data/message.md#message-objects)
-and [Prediction](./reference/rasa/core/policies/policy.md#policyprediction-objects) objects, which contain
-information about attention weights and other intermediate results of the inference computation.
-You can use this information for debugging and fine-tuning, e.g. with [RasaLit](https://github.com/RasaHQ/rasalit).
-
-After you've [trained a model](./reference/rasa/command-line-interface.md#rasa-train), you can access diagnostic data for DIET like this 
-(please define the `YOUR_RASA_MODEL_DIRECTORY` and `YOUR_RASA_MODEL_NAME` constants):
-
-```python
-from rasa.cli.utils import get_validated_path
-from rasa.model import get_model, get_model_subdirectories
-from rasa.nlu.model import Interpreter
-from rasa.shared.nlu.training_data.message import Message
-from rasa.shared.nlu.constants import TEXT
-from rasa.shared.constants import DIAGNOSTIC_DATA
-import pathlib
-
-
-def load_interpreter(model_dir, model):
-    path_str = str(pathlib.Path(model_dir) / model)
-    model = get_validated_path(path_str, "model")
-    model_path = get_model(model)
-    _, nlu_model = get_model_subdirectories(model_path)
-    return Interpreter.load(nlu_model)
-
-
-if __name__ == "__main__":
-    interpreter = load_interpreter(YOUR_RASA_MODEL_DIRECTORY, f"{YOUR_RASA_MODEL_NAME}.tar.gz")
-    data = interpreter.default_output_attributes()
-    data[TEXT] = "hello world"
-    message = Message(data=data)
-    for e in interpreter.pipeline:
-        e.process(message)
-    nlu_diagnostic_data = message.as_dict()[DIAGNOSTIC_DATA]
-
-    for component_name, diagnostic_data in nlu_diagnostic_data.items():
-        print(f"attention_weights for {component_name}:")
-        attention_weights = diagnostic_data["attention_weights"]
-        print(attention_weights)
-
-        print(f"\ntext_transformed for {component_name}:")
-        text_transformed = diagnostic_data["text_transformed"]
-        print(text_transformed)
-
-```
-
-And you can access diagnostic data for TED like this:
-
-```python
-from rasa.core.policies.ted_policy import TEDPolicy
-from rasa.shared.core.constants import ACTION_LISTEN_NAME
-from rasa.shared.core.domain import Domain
-from rasa.shared.core.events import ActionExecuted, UserUttered
-from rasa.shared.core.trackers import DialogueStateTracker
-from rasa.shared.nlu.interpreter import RegexInterpreter
-
-
-UTTER_GREET_ACTION = "utter_greet"
-GREET_INTENT_NAME = "greet"
-DOMAIN_YAML = f"""
-intents:
-- {GREET_INTENT_NAME}
-actions:
-- {UTTER_GREET_ACTION}
-"""
-
-
-if __name__ == "__main__":
-    domain = Domain.from_yaml(DOMAIN_YAML)
-    policy = TEDPolicy()
-    GREET_RULE = DialogueStateTracker.from_events(
-        "greet rule",
-        evts=[
-            UserUttered(intent={"name": GREET_INTENT_NAME}),
-            ActionExecuted(UTTER_GREET_ACTION),
-            ActionExecuted(ACTION_LISTEN_NAME),
-            UserUttered(intent={"name": GREET_INTENT_NAME}),
-            ActionExecuted(ACTION_LISTEN_NAME),
-        ],
-    )
-    policy.train([GREET_RULE], domain, RegexInterpreter())
-    prediction = policy.predict_action_probabilities(
-        GREET_RULE, domain, RegexInterpreter()
-    )
-
-    print(f"{prediction.diagnostic_data.get('attention_weights')}")
-```

From dc4037f6494c6919df46af53569fbc1dbbda31a2 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Thu, 21 Jan 2021 14:19:26 +0100
Subject: [PATCH 095/102] Move example code to ghist

---
 changelog/5673.improvement.md | 82 +----------------------------------
 1 file changed, 1 insertion(+), 81 deletions(-)

diff --git a/changelog/5673.improvement.md b/changelog/5673.improvement.md
index b224c30091ac..1afa952b6674 100644
--- a/changelog/5673.improvement.md
+++ b/changelog/5673.improvement.md
@@ -5,84 +5,4 @@ and [Prediction](./reference/rasa/core/policies/policy.md#policyprediction-objec
 information about attention weights and other intermediate results of the inference computation.
 This information can be used for debugging and fine-tuning, e.g. with [RasaLit](https://github.com/RasaHQ/rasalit).
 
-You can access diagnostic data for DIET like this (please define the `YOUR_RASA_MODEL_DIRECTORY` and `YOUR_RASA_MODEL_NAME` constants):
-
-```python
-from rasa.cli.utils import get_validated_path
-from rasa.model import get_model, get_model_subdirectories
-from rasa.nlu.model import Interpreter
-from rasa.shared.nlu.training_data.message import Message
-from rasa.shared.nlu.constants import TEXT
-from rasa.shared.constants import DIAGNOSTIC_DATA
-import pathlib
-
-
-def load_interpreter(model_dir, model):
-    path_str = str(pathlib.Path(model_dir) / model)
-    model = get_validated_path(path_str, "model")
-    model_path = get_model(model)
-    _, nlu_model = get_model_subdirectories(model_path)
-    return Interpreter.load(nlu_model)
-
-
-if __name__ == "__main__":
-    interpreter = load_interpreter(YOUR_RASA_MODEL_DIRECTORY, f"{YOUR_RASA_MODEL_NAME}.tar.gz")
-    data = interpreter.default_output_attributes()
-    data[TEXT] = "hello world"
-    message = Message(data=data)
-    for e in interpreter.pipeline:
-        e.process(message)
-    nlu_diagnostic_data = message.as_dict()[DIAGNOSTIC_DATA]
-
-    for component_name, diagnostic_data in nlu_diagnostic_data.items():
-        print(f"attention_weights for {component_name}:")
-        attention_weights = diagnostic_data["attention_weights"]
-        print(attention_weights)
-
-        print(f"\ntext_transformed for {component_name}:")
-        text_transformed = diagnostic_data["text_transformed"]
-        print(text_transformed)
-
-```
-
-You can access diagnostic data for TED like this:
-
-```python
-from rasa.core.policies.ted_policy import TEDPolicy
-from rasa.shared.core.constants import ACTION_LISTEN_NAME
-from rasa.shared.core.domain import Domain
-from rasa.shared.core.events import ActionExecuted, UserUttered
-from rasa.shared.core.trackers import DialogueStateTracker
-from rasa.shared.nlu.interpreter import RegexInterpreter
-
-
-UTTER_GREET_ACTION = "utter_greet"
-GREET_INTENT_NAME = "greet"
-DOMAIN_YAML = f"""
-intents:
-- {GREET_INTENT_NAME}
-actions:
-- {UTTER_GREET_ACTION}
-"""
-
-
-if __name__ == "__main__":
-    domain = Domain.from_yaml(DOMAIN_YAML)
-    policy = TEDPolicy()
-    GREET_RULE = DialogueStateTracker.from_events(
-        "greet rule",
-        evts=[
-            UserUttered(intent={"name": GREET_INTENT_NAME}),
-            ActionExecuted(UTTER_GREET_ACTION),
-            ActionExecuted(ACTION_LISTEN_NAME),
-            UserUttered(intent={"name": GREET_INTENT_NAME}),
-            ActionExecuted(ACTION_LISTEN_NAME),
-        ],
-    )
-    policy.train([GREET_RULE], domain, RegexInterpreter())
-    prediction = policy.predict_action_probabilities(
-        GREET_RULE, domain, RegexInterpreter()
-    )
-
-    print(f"{prediction.diagnostic_data.get('attention_weights')}")
-```
+For examples of how to access the diagnostic data, see [here](https://gist.github.com/JEM-Mosig/c6e15b81ee70561cb72e361aff310d7e).

From 75155c53ea6fc651a50200d8bba6e9911ad1c7f5 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Fri, 22 Jan 2021 12:29:47 +0100
Subject: [PATCH 096/102] Use fixture for ResponseSelector test

---
 tests/conftest.py                             | 26 +++++++++
 tests/core/conftest.py                        | 15 ------
 tests/nlu/classifiers/test_diet_classifier.py |  6 +--
 tests/nlu/selectors/test_selectors.py         | 53 ++++++-------------
 4 files changed, 44 insertions(+), 56 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 7af0f8f27ba6..bd573f3c72ba 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -45,6 +45,7 @@
     INCORRECT_NLU_DATA,
     SIMPLE_STORIES_FILE,
 )
+from rasa.shared.exceptions import FileNotFoundException
 
 DEFAULT_CONFIG_PATH = "rasa/cli/default_config.yml"
 
@@ -384,6 +385,31 @@ def blank_config() -> RasaNLUModelConfig:
     return RasaNLUModelConfig({"language": "en", "pipeline": []})
 
 
+@pytest.fixture(scope="session")
+async def trained_responseselectorbot_path(trained_async: Callable) -> Path:
+    zipped_model = await trained_async(
+        domain="examples/responseselectorbot/domain.yml",
+        config="examples/responseselectorbot/config.yml",
+        training_files=[
+            "examples/responseselectorbot/data/rules.yml",
+            "examples/responseselectorbot/data/stories.yml",
+            "examples/responseselectorbot/data/nlu.yml",
+        ],
+    )
+
+    if not zipped_model:
+        raise FileNotFoundException(f"Could not find model {zipped_model}")
+
+    return Path(zipped_model)
+
+
+@pytest.fixture(scope="session")
+async def response_selector_agent(
+    trained_responseselectorbot_path: Optional[Path],
+) -> Agent:
+    return Agent.load_local_model(trained_responseselectorbot_path)
+
+
 def write_endpoint_config_to_yaml(
     path: Path, data: Dict[Text, Any], endpoints_filename: Text = "endpoints.yml"
 ) -> Path:
diff --git a/tests/core/conftest.py b/tests/core/conftest.py
index 0d3c844b9ac0..194c789a7368 100644
--- a/tests/core/conftest.py
+++ b/tests/core/conftest.py
@@ -216,18 +216,3 @@ async def form_bot_agent(trained_async: Callable) -> Agent:
     )
 
     return Agent.load_local_model(zipped_model, action_endpoint=endpoint)
-
-
-@pytest.fixture(scope="session")
-async def response_selector_agent(trained_async: Callable) -> Agent:
-    zipped_model = await trained_async(
-        domain="examples/responseselectorbot/domain.yml",
-        config="examples/responseselectorbot/config.yml",
-        training_files=[
-            "examples/responseselectorbot/data/rules.yml",
-            "examples/responseselectorbot/data/stories.yml",
-            "examples/responseselectorbot/data/nlu.yml",
-        ],
-    )
-
-    return Agent.load_local_model(zipped_model)
diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py
index 389b5c004ee9..2d60eb37bbbe 100644
--- a/tests/nlu/classifiers/test_diet_classifier.py
+++ b/tests/nlu/classifiers/test_diet_classifier.py
@@ -508,15 +508,13 @@ async def test_train_persist_load_with_composite_entities(
     assert loaded.parse(text) == trained.parse(text)
 
 
-async def test_process_gives_diagnostic_data(
-    component_builder: ComponentBuilder, trained_nlu_moodbot_path: Text,
-):
+async def test_process_gives_diagnostic_data(trained_nlu_moodbot_path: Text,):
     """Tests if processing a message returns attention weights as numpy array."""
     with rasa.model.unpack_model(trained_nlu_moodbot_path) as unpacked_model_directory:
         _, nlu_model_directory = rasa.model.get_model_subdirectories(
             unpacked_model_directory
         )
-        interpreter = Interpreter.load(nlu_model_directory, component_builder)
+        interpreter = Interpreter.load(nlu_model_directory)
 
     message = Message(data={TEXT: "hello"})
     for component in interpreter.pipeline:
diff --git a/tests/nlu/selectors/test_selectors.py b/tests/nlu/selectors/test_selectors.py
index 98a6afb719b6..13b5744b1bbf 100644
--- a/tests/nlu/selectors/test_selectors.py
+++ b/tests/nlu/selectors/test_selectors.py
@@ -4,6 +4,7 @@
 import numpy as np
 from typing import List, Dict, Text, Any
 
+import rasa.model
 from rasa.nlu import train
 from rasa.nlu.components import ComponentBuilder
 from rasa.shared.nlu.training_data import util
@@ -287,52 +288,30 @@ async def test_train_persist_load(component_builder: ComponentBuilder, tmpdir: P
     )
 
 
-async def test_process_gives_diagnostic_data(
-    component_builder: ComponentBuilder, tmp_path: Path
-):
+async def test_process_gives_diagnostic_data(trained_responseselectorbot_path: Path,):
     """Tests if processing a message returns attention weights as numpy array."""
 
-    _config = RasaNLUModelConfig(
-        {
-            "pipeline": [
-                {"name": "WhitespaceTokenizer"},
-                {"name": "CountVectorsFeaturizer"},
-                {
-                    "name": "ResponseSelector",
-                    EPOCHS: 1,
-                    EVAL_NUM_EXAMPLES: 10,
-                    EVAL_NUM_EPOCHS: 1,
-                    NUM_TRANSFORMER_LAYERS: 1,
-                    TRANSFORMER_SIZE: 8,
-                },
-            ],
-            "language": "en",
-        }
-    )
-
-    (trainer, trained, persisted_path) = await train(
-        _config,
-        path=str(tmp_path),
-        data="data/test_selectors",
-        component_builder=component_builder,
-    )
-
-    assert trainer.pipeline
-    assert trained.pipeline
-
-    loaded = Interpreter.load(persisted_path, component_builder)
+    with rasa.model.unpack_model(
+        trained_responseselectorbot_path
+    ) as unpacked_model_directory:
+        _, nlu_model_directory = rasa.model.get_model_subdirectories(
+            unpacked_model_directory
+        )
+        interpreter = Interpreter.load(nlu_model_directory)
 
     message = Message(data={TEXT: "hello"})
-    for component in loaded.pipeline:
+    for component in interpreter.pipeline:
         component.process(message)
 
     diagnostic_data = message.get(DIAGNOSTIC_DATA)
 
-    # The last component is DIETClassifier, which should add attention weights
-    name = "component_2_ResponseSelector"
+    # The last component is ResponseSelector, which should add diagnostic data
+    name = f"component_{len(interpreter.pipeline) - 1}_ResponseSelector"
     assert isinstance(diagnostic_data, dict)
     assert name in diagnostic_data
-    assert "attention_weights" in diagnostic_data[name]
-    assert isinstance(diagnostic_data[name].get("attention_weights"), np.ndarray)
     assert "text_transformed" in diagnostic_data[name]
     assert isinstance(diagnostic_data[name].get("text_transformed"), np.ndarray)
+    # The `attention_weights` key should exist, regardless of there being a transformer
+    assert "attention_weights" in diagnostic_data[name]
+    # By default, ResponseSelector has `number_of_transformer_layers = 0`
+    assert diagnostic_data[name].get("attention_weights") is None

From 244fae0e321c5e795dc255ae60bd6604670306db Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Fri, 22 Jan 2021 12:51:00 +0100
Subject: [PATCH 097/102] Change exception type

---
 tests/conftest.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index bd573f3c72ba..673fcb991b68 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -45,7 +45,7 @@
     INCORRECT_NLU_DATA,
     SIMPLE_STORIES_FILE,
 )
-from rasa.shared.exceptions import FileNotFoundException
+from rasa.shared.exceptions import RasaException
 
 DEFAULT_CONFIG_PATH = "rasa/cli/default_config.yml"
 
@@ -398,7 +398,7 @@ async def trained_responseselectorbot_path(trained_async: Callable) -> Path:
     )
 
     if not zipped_model:
-        raise FileNotFoundException(f"Could not find model {zipped_model}")
+        raise RasaException(f"Model training for responseselectorbot failed.")
 
     return Path(zipped_model)
 

From b0d3dbc8313a59e1c854095b2e3e464a78f0392d Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Fri, 22 Jan 2021 12:52:59 +0100
Subject: [PATCH 098/102] Import less

---
 tests/nlu/classifiers/test_diet_classifier.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py
index 2d60eb37bbbe..456f5c76292c 100644
--- a/tests/nlu/classifiers/test_diet_classifier.py
+++ b/tests/nlu/classifiers/test_diet_classifier.py
@@ -5,7 +5,7 @@
 from unittest.mock import Mock
 from typing import List, Text, Dict, Any
 
-import rasa
+import rasa.model
 from rasa.shared.nlu.training_data.features import Features
 from rasa.nlu import train
 from rasa.nlu.classifiers import LABEL_RANKING_LENGTH

From dbc332f6911444099d354b1bd93cdf4a53abbcfe Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Fri, 22 Jan 2021 13:14:35 +0100
Subject: [PATCH 099/102] Remove f from f-string

---
 tests/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 673fcb991b68..75dcfc77b38c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -398,7 +398,7 @@ async def trained_responseselectorbot_path(trained_async: Callable) -> Path:
     )
 
     if not zipped_model:
-        raise RasaException(f"Model training for responseselectorbot failed.")
+        raise RasaException("Model training for responseselectorbot failed.")
 
     return Path(zipped_model)
 

From 644ec371c0d6b7d3d3eb3fcf064156e7a2e03dfa Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Fri, 22 Jan 2021 18:22:57 +0100
Subject: [PATCH 100/102] Rename `test_process_gives_diagnostic_data`

---
 tests/conftest.py                     | 6 +++---
 tests/nlu/selectors/test_selectors.py | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 75dcfc77b38c..b17c782f9973 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -386,7 +386,7 @@ def blank_config() -> RasaNLUModelConfig:
 
 
 @pytest.fixture(scope="session")
-async def trained_responseselectorbot_path(trained_async: Callable) -> Path:
+async def trained_response_selector_bot(trained_async: Callable) -> Path:
     zipped_model = await trained_async(
         domain="examples/responseselectorbot/domain.yml",
         config="examples/responseselectorbot/config.yml",
@@ -405,9 +405,9 @@ async def trained_responseselectorbot_path(trained_async: Callable) -> Path:
 
 @pytest.fixture(scope="session")
 async def response_selector_agent(
-    trained_responseselectorbot_path: Optional[Path],
+    trained_response_selector_bot: Optional[Path],
 ) -> Agent:
-    return Agent.load_local_model(trained_responseselectorbot_path)
+    return Agent.load_local_model(trained_response_selector_bot)
 
 
 def write_endpoint_config_to_yaml(
diff --git a/tests/nlu/selectors/test_selectors.py b/tests/nlu/selectors/test_selectors.py
index 13b5744b1bbf..6d5c4aabea9d 100644
--- a/tests/nlu/selectors/test_selectors.py
+++ b/tests/nlu/selectors/test_selectors.py
@@ -288,11 +288,11 @@ async def test_train_persist_load(component_builder: ComponentBuilder, tmpdir: P
     )
 
 
-async def test_process_gives_diagnostic_data(trained_responseselectorbot_path: Path,):
+async def test_process_gives_diagnostic_data(trained_response_selector_bot: Path):
     """Tests if processing a message returns attention weights as numpy array."""
 
     with rasa.model.unpack_model(
-        trained_responseselectorbot_path
+        trained_response_selector_bot
     ) as unpacked_model_directory:
         _, nlu_model_directory = rasa.model.get_model_subdirectories(
             unpacked_model_directory

From e57575db9c52641be367b8f6b764c7cd0a241fe2 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Tue, 26 Jan 2021 11:15:02 +0100
Subject: [PATCH 101/102] Change test_values_to_numpy

---
 tests/utils/tensorflow/test_numpy.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/tests/utils/tensorflow/test_numpy.py b/tests/utils/tensorflow/test_numpy.py
index 19ebf2dfca54..54712f193055 100644
--- a/tests/utils/tensorflow/test_numpy.py
+++ b/tests/utils/tensorflow/test_numpy.py
@@ -2,17 +2,9 @@
 import tensorflow as tf
 import numpy as np
 import rasa.utils.tensorflow.numpy
-import json
 from typing import Optional, Dict, Any
 
 
-class NumpyEncoder(json.JSONEncoder):
-    def default(self, obj):
-        if isinstance(obj, np.ndarray):
-            return obj.tolist()
-        return json.JSONEncoder.default(self, obj)
-
-
 @pytest.mark.parametrize(
     "value, expected_result",
     [
@@ -32,6 +24,8 @@ def test_values_to_numpy(
         type(value) for value in sorted(actual_result.values())
     ]
     assert actual_result_value_types == expected_result_value_types
-    assert json.dumps(actual_result, sort_keys=True, cls=NumpyEncoder) == json.dumps(
-        expected_result, sort_keys=True, cls=NumpyEncoder
-    )
+    for key, value in actual_result.items():
+        if isinstance(expected_result.get(key), np.ndarray):
+            np.testing.assert_equal(value, expected_result.get(key))
+        else:
+            assert value == expected_result.get(key)

From 1c83a749b09227aff13b86f98fd97348db3e87d0 Mon Sep 17 00:00:00 2001
From: "Johannes E. M. Mosig" <j.mosig@rasa.com>
Date: Tue, 26 Jan 2021 12:16:58 +0100
Subject: [PATCH 102/102] Simplify test_values_to_numpy

---
 tests/utils/tensorflow/test_numpy.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tests/utils/tensorflow/test_numpy.py b/tests/utils/tensorflow/test_numpy.py
index 54712f193055..4a0544d7ff32 100644
--- a/tests/utils/tensorflow/test_numpy.py
+++ b/tests/utils/tensorflow/test_numpy.py
@@ -24,8 +24,4 @@ def test_values_to_numpy(
         type(value) for value in sorted(actual_result.values())
     ]
     assert actual_result_value_types == expected_result_value_types
-    for key, value in actual_result.items():
-        if isinstance(expected_result.get(key), np.ndarray):
-            np.testing.assert_equal(value, expected_result.get(key))
-        else:
-            assert value == expected_result.get(key)
+    np.testing.assert_equal(actual_result, expected_result)