PaddlePaddle · zhangyimi · Sep 19, 2022 · Sep 14, 2022
diff --git a/setup.cfg b/setup.cfg
@@ -9,7 +9,7 @@ author_email = nlp-parser@baidu.com
 # Project version, versions only above than 1.0 will assumed as a released version.
 # When modifying project version to above than 1.0, here's the rules should be followed.
 # http://wiki.baidu.com/pages/viewpage.action?pageId=469686381
-version = 0.1.5
+version = 0.1.6
 # A brief introduction about the project, ANY NON-ENGLISH CHARACTER IS NOT SUPPORTED!
 description = baidu TrustAI
 # A longer version of introduction abouth the project, you can also include readme, change log, etc. .md or rst file is recommended.

diff --git a/trustai/__init__.py b/trustai/__init__.py
@@ -13,4 +13,4 @@
 # limitations under the License.
 """TrustAI"""
 
-__version__ = "0.1.5"
+__version__ = "0.1.6"
diff --git a/trustai/interpretation/example_level/common/utils.py b/trustai/interpretation/example_level/common/utils.py
@@ -38,11 +38,18 @@ def dot_similarity(inputs_a, inputs_b):
     return paddle.sum(inputs_a * inputs_b, axis=1)
 
 
-def cos_similarity(inputs_a, inputs_b):
+def cos_similarity(inputs_a, inputs_b, step=500000):
     """
     calaculate cosine similarity between the two inputs.
     """
-    return F.cosine_similarity(inputs_a, inputs_b.unsqueeze(0))
+    # Processing to avoid paddle bug
+    start, end = 0, step
+    res = []
+    while start < inputs_a.shape[0]:
+        res.append(F.cosine_similarity(inputs_a[start:end], inputs_b.unsqueeze(0)))
+        start = end
+        end = end + step
+    return paddle.concat(res, axis=0)
 
 
 def euc_similarity(inputs_a, inputs_b):

diff --git a/trustai/interpretation/example_level/method/feature_similarity.py b/trustai/interpretation/example_level/method/feature_similarity.py
@@ -3,6 +3,8 @@
 feature-based similarity method.
 cosine, cot and euc.
 """
+import os
+import sys
 import functools
 import warnings
 
@@ -25,6 +27,7 @@ def __init__(
         device=None,
         classifier_layer_name="classifier",
         predict_fn=None,
+        cached_train_feature=None,
     ):
         """
         Initialization.
@@ -38,9 +41,18 @@ def __init__(
         ExampleBaseInterpreter.__init__(self, paddle_model, device, predict_fn, classifier_layer_name)
         self.paddle_model = paddle_model
         self.classifier_layer_name = classifier_layer_name
-        self.train_feature, _ = self.extract_feature_from_dataloader(train_dataloader)
 
-
+        if cached_train_feature is not None and os.path.isfile(cached_train_feature):
+            self.train_feature = paddle.load(cached_train_feature)
+        else:
+            self.train_feature, _ = self.extract_feature_from_dataloader(train_dataloader)
+            if cached_train_feature is not None:
+                try:
+                    paddle.save(self.train_feature, cached_train_feature)
+                except IOError:
+                    import sys
+                    sys.stderr.write("save cached_train_feature fail")
+
     def interpret(self, data, sample_num=3, sim_fn="cos"):
         """
         Select most similar and dissimilar examples for a given data using the `sim_fn` metric.
@@ -87,7 +99,7 @@ def extract_feature_from_dataloader(self, dataloader):
         """
         print("Extracting feature from given dataloader, it will take some time...")
         features, preds = [], []
-        
+
         for batch in dataloader:
             feature, pred = self.extract_feature(self.paddle_model, batch)
             features.append(feature)