huggingface · lvwerra · Aug 24, 2022 · Aug 23, 2022 · Aug 23, 2022 · Aug 23, 2022
diff --git a/src/evaluate/evaluator/image_classification.py b/src/evaluate/evaluator/image_classification.py
@@ -50,6 +50,8 @@ class ImageClassificationEvaluator(Evaluator):
     Methods in this class assume a data format compatible with the [`ImageClassificationPipeline`].
     """
 
+    PIPELINE_KWARGS = {}
+
     def __init__(self, task="image-classification", default_metric_name=None):
         super().__init__(task, default_metric_name=default_metric_name)
 

diff --git a/src/evaluate/evaluator/question_answering.py b/src/evaluate/evaluator/question_answering.py
@@ -84,7 +84,7 @@ class QuestionAnsweringEvaluator(Evaluator):
     [`QuestionAnsweringPipeline`](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.QuestionAnsweringPipeline).
     """
 
-    PIPELINE_KWARGS = {"handle_impossible_answer": False}
+    PIPELINE_KWARGS = {}
 
     def __init__(self, task="question-answering", default_metric_name=None):
         super().__init__(task, default_metric_name=default_metric_name)
@@ -196,7 +196,7 @@ def compute(
 
         if squad_v2_format is None:
             squad_v2_format = self.is_squad_v2_format(data=data, label_column=label_column)
-            logger.warn(
+            logger.warning(
                 f"`squad_v2_format` parameter not provided to QuestionAnsweringEvaluator.compute(). Automatically inferred `squad_v2_format` as {squad_v2_format}."
             )
 
@@ -205,16 +205,18 @@ def compute(
         metric = self.prepare_metric(metric)
 
         if squad_v2_format and metric.name == "squad":
-            logger.warn(
+            logger.warning(
                 "The dataset has SQuAD v2 format but you are using the SQuAD metric. Consider passing the 'squad_v2' metric."
             )
         if not squad_v2_format and metric.name == "squad_v2":
-            logger.warn(
+            logger.warning(
                 "The dataset has SQuAD v1 format but you are using the SQuAD v2 metric. Consider passing the 'squad' metric."
             )
 
         if squad_v2_format:
             self.PIPELINE_KWARGS["handle_impossible_answer"] = True
+        else:
+            self.PIPELINE_KWARGS["handle_impossible_answer"] = False
 
         # Compute predictions
         predictions, perf_results = self.call_pipeline(pipe, **pipe_inputs)

diff --git a/tests/test_evaluator.py b/tests/test_evaluator.py
@@ -455,7 +455,7 @@ def test_model_init(self):
             metric="squad",
         )
         self.assertEqual(results["exact_match"], 0)
-        self.assertEqual(results["f1"], 0)
+        self.assertEqual(results["f1"], 100 / 3)
 
         model = AutoModelForQuestionAnswering.from_pretrained(self.default_model)
         tokenizer = AutoTokenizer.from_pretrained(self.default_model)
@@ -466,7 +466,7 @@ def test_model_init(self):
             tokenizer=tokenizer,
         )
         self.assertEqual(results["exact_match"], 0)
-        self.assertEqual(results["f1"], 0)
+        self.assertEqual(results["f1"], 100 / 3)
 
     def test_class_init(self):
         # squad_v1-like dataset