apply activation if transformer used

RasaHQ · Apr 15, 2020 · 9d1802c · 9d1802c
1 parent 61b6767
commit 9d1802c
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 5 deletions.
diff --git a/changelog/5626.msic.rst b/changelog/5626.msic.rst
@@ -1 +1 @@
-Move ``tfa.activations.gelu(x)`` from ``DIETClassifier`` to transformer block.
+Apply ``tfa.activations.gelu(x)`` only if min 1 transformer block is used in ``DIETClassifier``.
diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
@@ -1250,6 +1250,10 @@ def _create_sequence(
             inputs, 1 - mask, self._training
         )
 
+        if self.config[TRANSFORMER_SIZE] > 0:
+            # apply final activation
+            outputs = tfa.activations.gelu(outputs)
+
         return outputs, inputs, seq_ids, lm_mask_bool
 
     def _create_all_labels(self) -> Tuple[tf.Tensor, tf.Tensor]:

diff --git a/rasa/utils/tensorflow/transformer.py b/rasa/utils/tensorflow/transformer.py
@@ -626,7 +626,4 @@ def call(
         # if normalization is done in encoding layers, then it should also be done
         # on the output, since the output can grow very large, being the sum of
         # a whole stack of unnormalized layer outputs.
-        normalized_x = self._layer_norm(x)  # (batch_size, length, units)
-
-        # apply final activation
-        return tfa.activations.gelu(normalized_x)
+        return self._layer_norm(x)  # (batch_size, length, units)
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		Move ``tfa.activations.gelu(x)`` from ``DIETClassifier`` to transformer block.
		Apply ``tfa.activations.gelu(x)`` only if min 1 transformer block is used in ``DIETClassifier``.