Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Question Answering improvements - NQ3 #419

Merged
merged 47 commits into from
Jul 3, 2020
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
b8e553d
unify squad and nq baskets
brandenchan Jun 16, 2020
39bebc2
attempt at simplifying ids
brandenchan Jun 17, 2020
eb6834a
clean id handling
brandenchan Jun 17, 2020
24b5747
Better handling of different input dicts
brandenchan Jun 18, 2020
08b4d33
Merge branch 'master' into nq_2
brandenchan Jun 18, 2020
c41ed34
apply_tokenization merged
brandenchan Jun 18, 2020
2508f8c
clean apply_tokenization
brandenchan Jun 18, 2020
04f9d7b
Merge branch 'master' into nq_2
brandenchan Jun 22, 2020
e7b4e41
Rename samples to passages
brandenchan Jun 22, 2020
c13a890
Merge branch 'master' into nq_2
brandenchan Jun 22, 2020
56a7127
Clean id handling
brandenchan Jun 22, 2020
c33ce4d
rename is_impossible to no_answer
brandenchan Jun 22, 2020
55e0ad2
Rename preds_p to preds
brandenchan Jun 22, 2020
1308712
Add QAInference type hints
brandenchan Jun 22, 2020
61c5d7b
Adjust examples to new changes
brandenchan Jun 22, 2020
335b087
Fix type hint error
brandenchan Jun 22, 2020
abb4130
Check that label character index matches label str
brandenchan Jun 23, 2020
cb893b7
Minor improvements
brandenchan Jun 23, 2020
1b9e641
Enforce single label doc cls in preprocessing
brandenchan Jun 24, 2020
27e12ee
Refactor span_to_string, clean predictions objects
brandenchan Jun 24, 2020
61bc193
Remove unneccessary iteration
brandenchan Jun 24, 2020
acf8358
WIP clean and document predictions.py
brandenchan Jun 24, 2020
102763f
Add documentation of Pred objects
brandenchan Jun 25, 2020
18fb8bb
Merge branch 'master' into more_improvements
brandenchan Jun 25, 2020
e3d4bb6
Fix list index bug
brandenchan Jun 25, 2020
e619e2e
Merge branch 'more_improvements' of https://github.com/deepset-ai/FAR…
brandenchan Jun 25, 2020
aa3333c
Fix index in test sample
brandenchan Jun 25, 2020
1af739f
Refactor data check
brandenchan Jun 25, 2020
0d09698
Fix docstring
brandenchan Jun 25, 2020
0725e37
Simplify QA generate_labels()
brandenchan Jun 30, 2020
35c67f0
Rename internal methods
brandenchan Jun 30, 2020
28a66e7
update docstring
brandenchan Jun 30, 2020
651f8af
add input_features test
brandenchan Jul 1, 2020
2867a75
Add docstring
brandenchan Jul 1, 2020
605bf5b
Merge branch 'master' into more_improvements
brandenchan Jul 1, 2020
3446ff8
Fix import and error handling
brandenchan Jul 1, 2020
bb51fc9
Merge branch 'more_improvements' of https://github.com/deepset-ai/FAR…
brandenchan Jul 1, 2020
71738d2
Fix answer check
brandenchan Jul 1, 2020
41ec428
Fix sample check
brandenchan Jul 1, 2020
74c6e9c
move sample check to _sample_to_features
brandenchan Jul 1, 2020
171db3e
Pass QA inferencer args properly
brandenchan Jul 2, 2020
b4825f0
Rename span to qa_candidate
brandenchan Jul 2, 2020
fce94c7
Arg passing error causing Eval bug
brandenchan Jul 2, 2020
07b84e6
Fix bug in answer check
brandenchan Jul 2, 2020
473bf63
remove import
brandenchan Jul 2, 2020
ea76400
Remove reference to SampleError
brandenchan Jul 2, 2020
7d16016
Fix onnx sample
brandenchan Jul 2, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions farm/data_handler/input_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ def generate_labels(answers, passage_len_t, question_len_t, tokenizer, max_answe
["no_answer", "yes", "no", "span"] and this is what answer_type_list should look like"""

label_idxs = np.full((max_answers, 2), fill_value=-1)
answer_types = np.full((max_answers), fill_value=-1)
answer_types = np.full((1), fill_value=-1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a single scalar value needs init?


# If there are no answers
if len(answers) == 0:
Expand All @@ -419,7 +419,6 @@ def generate_labels(answers, passage_len_t, question_len_t, tokenizer, max_answe
return label_idxs, answer_types

for i, answer in enumerate(answers):
answer_type = answer["answer_type"]
start_idx = answer["start_t"]
end_idx = answer["end_t"]

Expand Down Expand Up @@ -471,10 +470,10 @@ def generate_labels(answers, passage_len_t, question_len_t, tokenizer, max_answe
label_idxs[i, 0] = start_idx
label_idxs[i, 1] = end_idx

# Only Natural Questions trains a classification head on answer_type, SQuAD only has the QA head. answer_type_list
# will be None for SQuAD but something like ["no_answer", "span", "yes", "no"] for Natural Questions
if answer_type_list:
answer_types[i] = answer_type_list.index(answer_type)
# Only Natural Questions trains a classification head on answer_type, SQuAD only has the QA head. answer_type_list
# will be None for SQuAD but something like ["no_answer", "span", "yes", "no"] for Natural Questions
if answer_type_list:
answer_types[0] = answer_type_list.index(answers[0]["answer_type"])

assert np.max(label_idxs) > -1

Expand Down
7 changes: 4 additions & 3 deletions farm/data_handler/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1148,7 +1148,7 @@ def file_to_dicts(self, file: str) -> [dict]:
nested_dicts = read_squad_file(filename=file)
dicts = [y for x in nested_dicts for y in x["paragraphs"]]
for d in dicts:
assert valid_answer(d)
check_valid_answer(d)
return dicts

def _dict_to_samples(self, dictionary: dict, **kwargs) -> [Sample]:
Expand Down Expand Up @@ -1680,7 +1680,8 @@ def is_impossible_to_answer_type(qas):
new_qas.append(q)
return new_qas

def valid_answer(dictionary):

def check_valid_answer(dictionary):
context = dictionary["context"]
for qa in dictionary["qas"]:
for answer in qa["answers"]:
Expand All @@ -1690,4 +1691,4 @@ def valid_answer(dictionary):
if context[start: end] != answer["text"]:
raise Exception(f"The answer extracted by start character index does not match the answer string: "
f"\t {context[start: end]} vs {answer['text']}")
return True

72 changes: 31 additions & 41 deletions farm/modeling/prediction_head.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
import itertools
import json
import logging
import os
import numpy as np
import pandas as pd
from scipy.special import expit, softmax
import tqdm

from pathlib import Path
import torch
from transformers.modeling_bert import BertForPreTraining, BertLayerNorm, ACT2FN
from transformers.modeling_auto import AutoModelForQuestionAnswering, AutoModelForTokenClassification, AutoModelForSequenceClassification
from transformers.configuration_auto import AutoConfig
from typing import List

from torch import nn
from torch.nn import CrossEntropyLoss, MSELoss, BCEWithLogitsLoss

from farm.data_handler.utils import is_json
from farm.utils import convert_iob_to_simple_tags, span_to_string, try_get
from farm.utils import convert_iob_to_simple_tags, try_get
from farm.modeling.predictions import QACandidate, QAPred

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -335,9 +332,7 @@ def forward(self, X):

def logits_to_loss(self, logits, **kwargs):
label_ids = kwargs.get(self.label_tensor_name)
# In Natural Questions, each dev sample can have multiple labels
# For loss calculation we only use the first label
label_ids = label_ids.narrow(1,0,1)
label_ids = label_ids
return self.loss_fct(logits, label_ids.view(-1))

def logits_to_probs(self, logits, return_class_probs, **kwargs):
Expand Down Expand Up @@ -1232,8 +1227,9 @@ def to_qa_preds(self, top_preds, no_ans_gaps, baskets):
# Iterate over each set of document level prediction
for pred_d, no_ans_gap, basket in zip(top_preds, no_ans_gaps, baskets):

# Unpack document offsets, clear text and squad_id
# Unpack document offsets, clear text and id
token_offsets = basket.samples[0].tokenized["document_offsets"]
pred_id = basket.id_external if basket.id_external else basket.id_internal

# These options reflect the different input dicts that can be assigned to the basket
# before any kind of normalization or preprocessing can happen
Expand All @@ -1245,29 +1241,23 @@ def to_qa_preds(self, top_preds, no_ans_gaps, baskets):

# Iterate over each prediction on the one document
full_preds = []
for qa_answer in pred_d:
# This should be a method of Span
pred_str, _, _ = span_to_string(qa_answer.offset_answer_start,
qa_answer.offset_answer_end,
token_offsets,
document_text)
qa_answer.add_answer(pred_str)
full_preds.append(qa_answer)
for qa_candidate in pred_d:
pred_str, _, _ = qa_candidate.span_to_string(token_offsets, document_text)
qa_candidate.add_answer(pred_str)
full_preds.append(qa_candidate)
n_samples = full_preds[0].n_passages_in_doc

pred_id = basket.id_external if basket.id_external else basket.id_internal

curr_doc_pred = QAPred(id=pred_id,

prediction=full_preds,
context=document_text,
question=question,
token_offsets=token_offsets,
context_window_size=self.context_window_size,
aggregation_level="document",
answer_types=[], # TODO
no_answer_gap=no_ans_gap,
n_passages=n_samples
)
n_passages=n_samples)

ret.append(curr_doc_pred)
return ret

Expand Down Expand Up @@ -1367,15 +1357,15 @@ def reduce_preds(self, preds):
# Get all predictions in flattened list and sort by score
pos_answers_flat = []
for sample_idx, passage_preds in enumerate(preds):
for qa_answer in passage_preds:
if not (qa_answer.offset_answer_start == -1 and qa_answer.offset_answer_end == -1):
pos_answers_flat.append(QACandidate(offset_answer_start=qa_answer.offset_answer_start,
offset_answer_end=qa_answer.offset_answer_end,
score=qa_answer.score,
answer_type=qa_answer.answer_type,
for qa_candidate in passage_preds:
if not (qa_candidate.offset_answer_start == -1 and qa_candidate.offset_answer_end == -1):
pos_answers_flat.append(QACandidate(offset_answer_start=qa_candidate.offset_answer_start,
offset_answer_end=qa_candidate.offset_answer_end,
score=qa_candidate.score,
answer_type=qa_candidate.answer_type,
offset_unit="token",
aggregation_level="passage",
passage_id=sample_idx,
passage_id=str(sample_idx),
n_passages_in_doc=n_samples)
)

Expand All @@ -1386,7 +1376,7 @@ def reduce_preds(self, preds):
no_ans_gap = -min([nas - pbs for nas, pbs in zip(no_answer_scores, passage_best_score)])

# "no answer" scores and positive answers scores are difficult to compare, because
# + a positive answer score is related to a specific text qa_answer
# + a positive answer score is related to a specific text qa_candidate
# - a "no answer" score is related to all input texts
# Thus we compute the "no answer" score relative to the best possible answer and adjust it by
# the most significant difference between scores.
Expand Down Expand Up @@ -1518,20 +1508,20 @@ def chunk(iterable, lengths):
samples_per_doc = [doc_pred.n_passages for doc_pred in preds_all[0][0]]
cls_preds_grouped = chunk(cls_preds, samples_per_doc)

for qa_doc_pred, cls_preds in zip(qa_preds, cls_preds_grouped):
pred_qa_answers = qa_doc_pred.prediction
pred_qa_answers_new = []
for pred_qa_answer in pred_qa_answers:
passage_id = pred_qa_answer.passage_id
for qa_pred, cls_preds in zip(qa_preds, cls_preds_grouped):
qa_candidates = qa_pred.prediction
qa_candidates_new = []
for qa_candidate in qa_candidates:
passage_id = qa_candidate.passage_id
if passage_id is not None:
cls_pred = cls_preds[passage_id]["label"]
cls_pred = cls_preds[int(passage_id)]["label"]
# i.e. if no_answer
else:
cls_pred = "no_answer"
pred_qa_answer.add_cls(cls_pred)
pred_qa_answers_new.append(pred_qa_answer)
qa_doc_pred.prediction = pred_qa_answers_new
ret.append(qa_doc_pred)
qa_candidate.add_cls(cls_pred)
qa_candidates_new.append(qa_candidate)
qa_pred.prediction = qa_candidates_new
ret.append(qa_pred)
return ret


Expand Down
Loading