Skip to content

Commit

Permalink
Question Answering improvements - NQ3 (#419)
Browse files Browse the repository at this point in the history
* unify squad and nq baskets

* Clean id handling

* Add QAInference type hints

* add input_features test
  • Loading branch information
brandenchan authored Jul 3, 2020
1 parent 68cbe01 commit f04c230
Show file tree
Hide file tree
Showing 19 changed files with 378 additions and 241 deletions.
6 changes: 3 additions & 3 deletions examples/natural_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from farm.data_handler.data_silo import DataSilo
from farm.data_handler.processor import NaturalQuestionsProcessor
from farm.file_utils import fetch_archive_from_http
from farm.infer import Inferencer
from farm.infer import QAInferencer
from farm.modeling.adaptive_model import AdaptiveModel
from farm.modeling.language_model import LanguageModel
from farm.modeling.optimization import initialize_optimizer
Expand Down Expand Up @@ -68,7 +68,7 @@ def question_answering():
max_seq_len=384,
train_filename=train_filename,
dev_filename=dev_filename,
keep_is_impossible=keep_is_impossible,
keep_no_answer=keep_is_impossible,
downsample_context_size=downsample_context_size,
data_dir=Path("../data/natural_questions"),
)
Expand Down Expand Up @@ -131,7 +131,7 @@ def question_answering():
}
]

model = Inferencer.load(model_name_or_path="../saved_models/farm/roberta-base-squad2-nq", batch_size=batch_size, gpu=True)
model = QAInferencer.load(model_name_or_path="../saved_models/farm/roberta-base-squad2-nq", batch_size=batch_size, gpu=True)
result = model.inference_from_dicts(dicts=QA_input, return_json=False) # result is a list of QAPred objects

print(f"\nQuestion: Did GameTrailers rated Twilight Princess as one of the best games ever created?"
Expand Down
4 changes: 2 additions & 2 deletions examples/question_answering.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from farm.data_handler.data_silo import DataSilo
from farm.data_handler.processor import SquadProcessor
from farm.data_handler.utils import write_squad_predictions
from farm.infer import Inferencer
from farm.infer import QAInferencer
from farm.modeling.adaptive_model import AdaptiveModel
from farm.modeling.language_model import LanguageModel
from farm.modeling.optimization import initialize_optimizer
Expand Down Expand Up @@ -110,7 +110,7 @@ def question_answering():
"context": "Twilight Princess was released to universal critical acclaim and commercial success. It received perfect scores from major publications such as 1UP.com, Computer and Video Games, Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators GameRankings and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii version and scores of 95% and 96 for the GameCube version. GameTrailers in their review called it one of the greatest games ever created."
}]

model = Inferencer.load(save_dir, batch_size=40, gpu=True)
model = QAInferencer.load(save_dir, batch_size=40, gpu=True)
result = model.inference_from_dicts(dicts=QA_input)[0]

pprint.pprint(result)
Expand Down
120 changes: 51 additions & 69 deletions farm/data_handler/input_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def sample_to_features_text(
input_ids = pad(input_ids, max_seq_len, tokenizer.pad_token_id, pad_on_left=pad_on_left)
padding_mask = pad(padding_mask, max_seq_len, 0, pad_on_left=pad_on_left)


assert len(input_ids) == max_seq_len
assert len(padding_mask) == max_seq_len
assert len(segment_ids) == max_seq_len
Expand Down Expand Up @@ -307,10 +306,28 @@ def samples_to_features_bert_lm(sample, max_seq_len, tokenizer, next_sent_pred=T
return [feature_dict]


def sample_to_features_qa(sample, tokenizer, max_seq_len, answer_type_list=None, max_answers=6):
def sample_to_features_qa(sample, tokenizer, max_seq_len, sp_toks_start, sp_toks_mid,
answer_type_list=None, max_answers=6):
""" Prepares data for processing by the model. Supports cases where there are
multiple answers for the one question/document pair. max_answers is by default set to 6 since
that is the most number of answers in the squad2.0 dev set."""
that is the most number of answers in the squad2.0 dev set.
:param sample: A Sample object that contains one question / passage pair
:type sample: Sample
:param tokenizer: A Tokenizer object
:type tokenizer: Tokenizer
:param max_seq_len: The maximum sequence length
:type max_seq_len: int
:param sp_toks_start: The number of special tokens that come before the question tokens
:type sp_toks_start: int
:param sp_toks_mid: The number of special tokens that come between the question and passage tokens
:type sp_toks_mid: int
:param answer_type_list: A list of all the answer types that can be expected e.g. ["no_answer", "span", "yes", "no"] for Natural Questions
:type answer_type_list: List[str]
:param max_answers: The maximum number of answer annotations for a sample (In SQuAD, this is 6 hence the default)
:type max_answers: int
:return: dict (keys: [input_ids, padding_mask, segment_ids, answer_type_ids, passage_start_t, start_of_word, labels, id, seq_2_start_2])
"""

# Initialize some basic variables
question_tokens = sample.tokenized["question_tokens"]
Expand All @@ -329,9 +346,10 @@ def sample_to_features_qa(sample, tokenizer, max_seq_len, answer_type_list=None,
labels, answer_types = generate_labels(answers,
passage_len_t,
question_len_t,
tokenizer,
answer_type_list=answer_type_list,
max_answers=max_answers)
max_answers,
sp_toks_start,
sp_toks_mid,
answer_type_list)

# Generate a start of word vector for the full sequence (i.e. question + answer + special tokens).
# This will allow us to perform evaluation during training without clear text.
Expand Down Expand Up @@ -382,10 +400,9 @@ def sample_to_features_qa(sample, tokenizer, max_seq_len, answer_type_list=None,
if tokenizer.__class__.__name__ in ["XLMRobertaTokenizer", "RobertaTokenizer"]:
segment_ids = np.zeros_like(segment_ids)

# Todo: explain how only the first of labels will be used in train, and the full array will be used in eval
# TODO Offset, start of word and spec_tok_mask are not actually needed by model.forward() but are needed for model.formatted_preds()
# TODO passage_start_t is index of passage's first token relative to document
# I don't think we actually need offsets anymore
# The first of the labels will be used in train, and the full array will be used in eval.
# start of word and spec_tok_mask are not actually needed by model.forward() but are needed for model.formatted_preds()
# passage_start_t is index of passage's first token relative to document
feature_dict = {"input_ids": input_ids,
"padding_mask": padding_mask,
"segment_ids": segment_ids,
Expand All @@ -398,85 +415,50 @@ def sample_to_features_qa(sample, tokenizer, max_seq_len, answer_type_list=None,
return [feature_dict]


def generate_labels(answers, passage_len_t, question_len_t, tokenizer, max_answers, answer_type_list=None):
def generate_labels(answers, passage_len_t, question_len_t, max_answers,
sp_toks_start, sp_toks_mid, answer_type_list=None):
"""
Creates QA label for each answer in answers. The labels are the index of the start and end token
Creates QA label vector for each answer in answers. The labels are the index of the start and end token
relative to the passage. They are contained in an array of size (max_answers, 2).
-1 used to fill array since there the number of answers is often less than max_answers.
-1 is used to fill array since there the number of answers is often less than max_answers.
The index values take in to consideration the question tokens, and also special tokens such as [CLS].
When the answer is not fully contained in the passage, or the question
is impossible to answer, the start_idx and end_idx are 0 i.e. start and end are on the very first token
(in most models, this is the [CLS] token). Note that in our implementation NQ has 4 labels
(in most models, this is the [CLS] token). Note that in our implementation NQ has 4 answer types
["no_answer", "yes", "no", "span"] and this is what answer_type_list should look like"""

# Note here that label_idxs get passed to the QuestionAnsweringHead and answer_types get passed to the text
# classification head. label_idxs may contain multiple start, end labels since SQuAD dev and test sets
# can have multiple annotations. By contrast, Natural Questions only has one annotation per sample hence
# why answer_types is only of length 1
label_idxs = np.full((max_answers, 2), fill_value=-1)
answer_types = np.full((max_answers), fill_value=-1)
answer_types = np.asarray([-1])
answer_str = ""

# If there are no answers
if len(answers) == 0:
label_idxs[0, :] = 0
answer_types[:] = 0
return label_idxs, answer_types

# Iterate over the answers for the one sample
for i, answer in enumerate(answers):
answer_type = answer["answer_type"]
start_idx = answer["start_t"]
end_idx = answer["end_t"]

# We are going to operate on one-hot label vectors which will later be converted back to label indices.
# This is to take advantage of tokenizer.encode_plus() which applies model dependent special token conventions.
# The two label vectors (start and end) are composed of sections that correspond to the question and
# passage tokens. These are initialized here. The section corresponding to the question
# will always be composed of 0s.
start_vec_question = [0] * question_len_t
end_vec_question = [0] * question_len_t
start_vec_passage = [0] * passage_len_t
end_vec_passage = [0] * passage_len_t

# If the answer is in the current passage, populate the label vector with 1s for start and end
# Check that the start and end are contained within this passage
if answer_in_passage(start_idx, end_idx, passage_len_t):
start_vec_passage[start_idx] = 1
end_vec_passage[end_idx] = 1

# Combine the sections of the label vectors. The length of each of these will be:
# question_len_t + passage_len_t + n_special_tokens
start_vec = combine_vecs(start_vec_question,
start_vec_passage,
tokenizer,
spec_tok_val=0)
end_vec = combine_vecs(end_vec_question,
end_vec_passage,
tokenizer,
spec_tok_val=0)

start_label_present = 1 in start_vec
end_label_present = 1 in end_vec

# This is triggered if the answer is not in the passage or the question warrants a no_answer
# In both cases, the token at idx=0 (in BERT, this is the [CLS] token) is given both the start and end label
if start_label_present is False and end_label_present is False:
start_vec[0] = 1
end_vec[0] = 1
answer_type = "no_answer"
elif start_label_present is False or end_label_present is False:
raise Exception("The label vectors are lacking either a start or end label")

# Ensure label vectors are one-hot
assert sum(start_vec) == 1
assert sum(end_vec) == 1

start_idx = start_vec.index(1)
end_idx = end_vec.index(1)

label_idxs[i, 0] = start_idx
label_idxs[i, 1] = end_idx

# Only Natural Questions trains a classification head on answer_type, SQuAD only has the QA head. answer_type_list
# will be None for SQuAD but something like ["no_answer", "span", "yes", "no"] for Natural Questions
if answer_type_list:
answer_types[i] = answer_type_list.index(answer_type)

assert np.max(label_idxs) > -1
label_idxs[i][0] = sp_toks_start + question_len_t + sp_toks_mid + start_idx
label_idxs[i][1] = sp_toks_start + question_len_t + sp_toks_mid + end_idx
answer_str = answer["answer_type"]
# If the start or end of the span answer is outside the passage, treat passage as no_answer
else:
label_idxs[i][0] = 0
label_idxs[i][1] = 0
answer_str = "no_answer"

if answer_type_list:
answer_types[0] = answer_type_list.index(answer_str)

return label_idxs, answer_types

Expand Down
Loading

0 comments on commit f04c230

Please sign in to comment.