deepset-ai · Timoeller · Jul 3, 2020 · Jun 16, 2020 · Jun 17, 2020 · Jun 17, 2020
diff --git a/farm/data_handler/input_features.py b/farm/data_handler/input_features.py
@@ -410,7 +410,7 @@ def generate_labels(answers, passage_len_t, question_len_t, tokenizer, max_answe
     ["no_answer", "yes", "no", "span"] and this is what answer_type_list should look like"""
 
     label_idxs = np.full((max_answers, 2), fill_value=-1)
-    answer_types = np.full((max_answers), fill_value=-1)
+    answer_types = np.full((1), fill_value=-1)
 
     # If there are no answers
     if len(answers) == 0:
@@ -419,7 +419,6 @@ def generate_labels(answers, passage_len_t, question_len_t, tokenizer, max_answe
         return label_idxs, answer_types
 
     for i, answer in enumerate(answers):
-        answer_type = answer["answer_type"]
         start_idx = answer["start_t"]
         end_idx = answer["end_t"]
 
@@ -471,10 +470,10 @@ def generate_labels(answers, passage_len_t, question_len_t, tokenizer, max_answe
         label_idxs[i, 0] = start_idx
         label_idxs[i, 1] = end_idx
 
-        # Only Natural Questions trains a classification head on answer_type, SQuAD only has the QA head. answer_type_list
-        # will be None for SQuAD but something like ["no_answer", "span", "yes", "no"] for Natural Questions
-        if answer_type_list:
-            answer_types[i] = answer_type_list.index(answer_type)
+    # Only Natural Questions trains a classification head on answer_type, SQuAD only has the QA head. answer_type_list
+    # will be None for SQuAD but something like ["no_answer", "span", "yes", "no"] for Natural Questions
+    if answer_type_list:
+        answer_types[0] = answer_type_list.index(answers[0]["answer_type"])
 
     assert np.max(label_idxs) > -1
 

diff --git a/farm/data_handler/processor.py b/farm/data_handler/processor.py
@@ -1148,7 +1148,7 @@ def file_to_dicts(self, file: str) -> [dict]:
         nested_dicts = read_squad_file(filename=file)
         dicts = [y for x in nested_dicts for y in x["paragraphs"]]
         for d in dicts:
-            assert valid_answer(d)
+            check_valid_answer(d)
         return dicts
 
     def _dict_to_samples(self, dictionary: dict, **kwargs) -> [Sample]:
@@ -1680,7 +1680,8 @@ def is_impossible_to_answer_type(qas):
         new_qas.append(q)
     return new_qas
 
-def valid_answer(dictionary):
+
+def check_valid_answer(dictionary):
     context = dictionary["context"]
     for qa in dictionary["qas"]:
         for answer in qa["answers"]:
@@ -1690,4 +1691,4 @@ def valid_answer(dictionary):
             if context[start: end] != answer["text"]:
                 raise Exception(f"The answer extracted by start character index does not match the answer string: "
                                  f"\t {context[start: end]} vs {answer['text']}")
-    return True
+
diff --git a/farm/modeling/prediction_head.py b/farm/modeling/prediction_head.py
@@ -1,22 +1,19 @@
-import itertools
 import json
 import logging
 import os
 import numpy as np
-import pandas as pd
-from scipy.special import expit, softmax
-import tqdm
+
 from pathlib import Path
 import torch
 from transformers.modeling_bert import BertForPreTraining, BertLayerNorm, ACT2FN
 from transformers.modeling_auto import AutoModelForQuestionAnswering, AutoModelForTokenClassification, AutoModelForSequenceClassification
-from transformers.configuration_auto import AutoConfig
+from typing import List
 
 from torch import nn
 from torch.nn import CrossEntropyLoss, MSELoss, BCEWithLogitsLoss
 
 from farm.data_handler.utils import is_json
-from farm.utils import convert_iob_to_simple_tags, span_to_string, try_get
+from farm.utils import convert_iob_to_simple_tags, try_get
 from farm.modeling.predictions import QACandidate, QAPred
 
 logger = logging.getLogger(__name__)
@@ -335,9 +332,7 @@ def forward(self, X):
 
     def logits_to_loss(self, logits, **kwargs):
         label_ids = kwargs.get(self.label_tensor_name)
-        # In Natural Questions, each dev sample can have multiple labels
-        # For loss calculation we only use the first label
-        label_ids = label_ids.narrow(1,0,1)
+        label_ids = label_ids
         return self.loss_fct(logits, label_ids.view(-1))
 
     def logits_to_probs(self, logits, return_class_probs, **kwargs):
@@ -1232,8 +1227,9 @@ def to_qa_preds(self, top_preds, no_ans_gaps, baskets):
         # Iterate over each set of document level prediction
         for pred_d, no_ans_gap, basket in zip(top_preds, no_ans_gaps, baskets):
 
-            # Unpack document offsets, clear text and squad_id
+            # Unpack document offsets, clear text and id
             token_offsets = basket.samples[0].tokenized["document_offsets"]
+            pred_id = basket.id_external if basket.id_external else basket.id_internal
 
             # These options reflect the different input dicts that can be assigned to the basket
             # before any kind of normalization or preprocessing can happen
@@ -1245,29 +1241,23 @@ def to_qa_preds(self, top_preds, no_ans_gaps, baskets):
 
             # Iterate over each prediction on the one document
             full_preds = []
-            for qa_answer in pred_d:
-                # This should be a method of Span
-                pred_str, _, _ = span_to_string(qa_answer.offset_answer_start,
-                                                qa_answer.offset_answer_end,
-                                                token_offsets,
-                                                document_text)
-                qa_answer.add_answer(pred_str)
-                full_preds.append(qa_answer)
+            for qa_candidate in pred_d:
+                pred_str, _, _ = qa_candidate.span_to_string(token_offsets, document_text)
+                qa_candidate.add_answer(pred_str)
+                full_preds.append(qa_candidate)
             n_samples = full_preds[0].n_passages_in_doc
 
-            pred_id = basket.id_external if basket.id_external else basket.id_internal
-
             curr_doc_pred = QAPred(id=pred_id,
+
                                    prediction=full_preds,
                                    context=document_text,
                                    question=question,
                                    token_offsets=token_offsets,
                                    context_window_size=self.context_window_size,
                                    aggregation_level="document",
-                                   answer_types=[],  # TODO
                                    no_answer_gap=no_ans_gap,
-                                   n_passages=n_samples
-                                   )
+                                   n_passages=n_samples)
+
             ret.append(curr_doc_pred)
         return ret
 
@@ -1367,15 +1357,15 @@ def reduce_preds(self, preds):
         # Get all predictions in flattened list and sort by score
         pos_answers_flat = []
         for sample_idx, passage_preds in enumerate(preds):
-            for qa_answer in passage_preds:
-                if not (qa_answer.offset_answer_start == -1 and qa_answer.offset_answer_end == -1):
-                    pos_answers_flat.append(QACandidate(offset_answer_start=qa_answer.offset_answer_start,
-                                                        offset_answer_end=qa_answer.offset_answer_end,
-                                                        score=qa_answer.score,
-                                                        answer_type=qa_answer.answer_type,
+            for qa_candidate in passage_preds:
+                if not (qa_candidate.offset_answer_start == -1 and qa_candidate.offset_answer_end == -1):
+                    pos_answers_flat.append(QACandidate(offset_answer_start=qa_candidate.offset_answer_start,
+                                                        offset_answer_end=qa_candidate.offset_answer_end,
+                                                        score=qa_candidate.score,
+                                                        answer_type=qa_candidate.answer_type,
                                                         offset_unit="token",
                                                         aggregation_level="passage",
-                                                        passage_id=sample_idx,
+                                                        passage_id=str(sample_idx),
                                                         n_passages_in_doc=n_samples)
                                             )
 
@@ -1386,7 +1376,7 @@ def reduce_preds(self, preds):
         no_ans_gap = -min([nas - pbs for nas, pbs in zip(no_answer_scores, passage_best_score)])
 
         # "no answer" scores and positive answers scores are difficult to compare, because
-        # + a positive answer score is related to a specific text qa_answer
+        # + a positive answer score is related to a specific text qa_candidate
         # - a "no answer" score is related to all input texts
         # Thus we compute the "no answer" score relative to the best possible answer and adjust it by
         # the most significant difference between scores.
@@ -1518,20 +1508,20 @@ def chunk(iterable, lengths):
         samples_per_doc = [doc_pred.n_passages for doc_pred in preds_all[0][0]]
         cls_preds_grouped = chunk(cls_preds, samples_per_doc)
 
-        for qa_doc_pred, cls_preds in zip(qa_preds, cls_preds_grouped):
-            pred_qa_answers = qa_doc_pred.prediction
-            pred_qa_answers_new = []
-            for pred_qa_answer in pred_qa_answers:
-                passage_id = pred_qa_answer.passage_id
+        for qa_pred, cls_preds in zip(qa_preds, cls_preds_grouped):
+            qa_candidates = qa_pred.prediction
+            qa_candidates_new = []
+            for qa_candidate in qa_candidates:
+                passage_id = qa_candidate.passage_id
                 if passage_id is not None:
-                    cls_pred = cls_preds[passage_id]["label"]
+                    cls_pred = cls_preds[int(passage_id)]["label"]
                 # i.e. if no_answer
                 else:
                     cls_pred = "no_answer"
-                pred_qa_answer.add_cls(cls_pred)
-                pred_qa_answers_new.append(pred_qa_answer)
-            qa_doc_pred.prediction = pred_qa_answers_new
-            ret.append(qa_doc_pred)
+                qa_candidate.add_cls(cls_pred)
+                qa_candidates_new.append(qa_candidate)
+            qa_pred.prediction = qa_candidates_new
+            ret.append(qa_pred)
         return ret