allenai · zegnog · Jul 5, 2021 · Jul 8, 2021 · Jul 9, 2021 · Jul 23, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -76,6 +76,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Changed behavior of `MultiOptimizer` so that while a default optimizer is still required, an error is not thrown if the default optimizer receives no parameters.
 - Made the epsilon parameter for the layer normalization in token embeddings configurable. 
+- Change the behavior of gradient normalization so when input sequence is more than 1d it does not throw an error.
+- Take the abs value of embeding gradient per token before summing and normalizing.
 
 ### Removed
 

diff --git a/allennlp/interpret/saliency_interpreters/integrated_gradient.py b/allennlp/interpret/saliency_interpreters/integrated_gradient.py
@@ -1,4 +1,3 @@
-import math
 from typing import List, Dict, Any
 
 import numpy
@@ -30,9 +29,9 @@ def saliency_interpret_from_json(self, inputs: JsonDict) -> JsonDict:
             # Normalize results
             for key, grad in grads.items():
                 # The [0] here is undo-ing the batching that happens in get_gradients.
-                embedding_grad = numpy.sum(grad[0], axis=1)
-                norm = numpy.linalg.norm(embedding_grad, ord=1)
-                normalized_grad = [math.fabs(e) / norm for e in embedding_grad]
+                embedding_grad = numpy.sum(numpy.abs(grad[0]), axis=-1)
+                norm = numpy.linalg.norm(embedding_grad, ord=1, keepdims=True)
+                normalized_grad = embedding_grad / norm
                 grads[key] = normalized_grad
 
             instances_with_grads["instance_" + str(idx + 1)] = grads

diff --git a/allennlp/interpret/saliency_interpreters/simple_gradient.py b/allennlp/interpret/saliency_interpreters/simple_gradient.py
@@ -1,5 +1,3 @@
-import math
-
 from typing import List
 import numpy
 import torch
@@ -48,9 +46,9 @@ def saliency_interpret_from_json(self, inputs: JsonDict) -> JsonDict:
                 # gradient and its respective embedding.
                 input_idx = int(key[-1]) - 1
                 # The [0] here is undo-ing the batching that happens in get_gradients.
-                emb_grad = numpy.sum(grad[0] * embeddings_list[input_idx][0], axis=1)
-                norm = numpy.linalg.norm(emb_grad, ord=1)
-                normalized_grad = [math.fabs(e) / norm for e in emb_grad]
+                emb_grad = numpy.sum(numpy.abs(grad[0] * embeddings_list[input_idx][0]), axis=-1)
+                norm = numpy.linalg.norm(emb_grad, ord=1, keepdims=True)
+                normalized_grad = emb_grad / norm
                 grads[key] = normalized_grad
 
             instances_with_grads["instance_" + str(idx + 1)] = grads

diff --git a/allennlp/interpret/saliency_interpreters/smooth_gradient.py b/allennlp/interpret/saliency_interpreters/smooth_gradient.py
@@ -1,4 +1,3 @@
-import math
 from typing import Dict, Any
 
 import numpy
@@ -39,9 +38,9 @@ def saliency_interpret_from_json(self, inputs: JsonDict) -> JsonDict:
                 # Fine for now, but should fix for consistency.
 
                 # The [0] here is undo-ing the batching that happens in get_gradients.
-                embedding_grad = numpy.sum(grad[0], axis=1)
-                norm = numpy.linalg.norm(embedding_grad, ord=1)
-                normalized_grad = [math.fabs(e) / norm for e in embedding_grad]
+                embedding_grad = numpy.sum(numpy.abs(grad[0]), axis=-1)
+                norm = numpy.linalg.norm(embedding_grad, ord=1, keepdims=True)
+                normalized_grad = embedding_grad / norm
                 grads[key] = normalized_grad
 
             instances_with_grads["instance_" + str(idx + 1)] = grads