diff --git a/setup.cfg b/setup.cfg index a32e481..451a8be 100644 --- a/setup.cfg +++ b/setup.cfg @@ -9,7 +9,7 @@ author_email = nlp-parser@baidu.com # Project version, versions only above than 1.0 will assumed as a released version. # When modifying project version to above than 1.0, here's the rules should be followed. # http://wiki.baidu.com/pages/viewpage.action?pageId=469686381 -version = 0.1.5 +version = 0.1.6 # A brief introduction about the project, ANY NON-ENGLISH CHARACTER IS NOT SUPPORTED! description = baidu TrustAI # A longer version of introduction abouth the project, you can also include readme, change log, etc. .md or rst file is recommended. diff --git a/trustai/__init__.py b/trustai/__init__.py index 79bc5ce..2d5b7b4 100644 --- a/trustai/__init__.py +++ b/trustai/__init__.py @@ -13,4 +13,4 @@ # limitations under the License. """TrustAI""" -__version__ = "0.1.5" \ No newline at end of file +__version__ = "0.1.6" \ No newline at end of file diff --git a/trustai/interpretation/example_level/common/utils.py b/trustai/interpretation/example_level/common/utils.py index 66dd1cf..e81ba6f 100644 --- a/trustai/interpretation/example_level/common/utils.py +++ b/trustai/interpretation/example_level/common/utils.py @@ -38,11 +38,18 @@ def dot_similarity(inputs_a, inputs_b): return paddle.sum(inputs_a * inputs_b, axis=1) -def cos_similarity(inputs_a, inputs_b): +def cos_similarity(inputs_a, inputs_b, step=500000): """ calaculate cosine similarity between the two inputs. """ - return F.cosine_similarity(inputs_a, inputs_b.unsqueeze(0)) + # Processing to avoid paddle bug + start, end = 0, step + res = [] + while start < inputs_a.shape[0]: + res.append(F.cosine_similarity(inputs_a[start:end], inputs_b.unsqueeze(0))) + start = end + end = end + step + return paddle.concat(res, axis=0) def euc_similarity(inputs_a, inputs_b): diff --git a/trustai/interpretation/example_level/method/feature_similarity.py b/trustai/interpretation/example_level/method/feature_similarity.py index 0f72dfe..269257f 100644 --- a/trustai/interpretation/example_level/method/feature_similarity.py +++ b/trustai/interpretation/example_level/method/feature_similarity.py @@ -3,6 +3,8 @@ feature-based similarity method. cosine, cot and euc. """ +import os +import sys import functools import warnings @@ -25,6 +27,7 @@ def __init__( device=None, classifier_layer_name="classifier", predict_fn=None, + cached_train_feature=None, ): """ Initialization. @@ -38,9 +41,18 @@ def __init__( ExampleBaseInterpreter.__init__(self, paddle_model, device, predict_fn, classifier_layer_name) self.paddle_model = paddle_model self.classifier_layer_name = classifier_layer_name - self.train_feature, _ = self.extract_feature_from_dataloader(train_dataloader) - + if cached_train_feature is not None and os.path.isfile(cached_train_feature): + self.train_feature = paddle.load(cached_train_feature) + else: + self.train_feature, _ = self.extract_feature_from_dataloader(train_dataloader) + if cached_train_feature is not None: + try: + paddle.save(self.train_feature, cached_train_feature) + except IOError: + import sys + sys.stderr.write("save cached_train_feature fail") + def interpret(self, data, sample_num=3, sim_fn="cos"): """ Select most similar and dissimilar examples for a given data using the `sim_fn` metric. @@ -87,7 +99,7 @@ def extract_feature_from_dataloader(self, dataloader): """ print("Extracting feature from given dataloader, it will take some time...") features, preds = [], [] - + for batch in dataloader: feature, pred = self.extract_feature(self.paddle_model, batch) features.append(feature)