Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Question Answering improvements - NQ3 #419

Merged
merged 47 commits into from
Jul 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
b8e553d
unify squad and nq baskets
brandenchan Jun 16, 2020
39bebc2
attempt at simplifying ids
brandenchan Jun 17, 2020
eb6834a
clean id handling
brandenchan Jun 17, 2020
24b5747
Better handling of different input dicts
brandenchan Jun 18, 2020
08b4d33
Merge branch 'master' into nq_2
brandenchan Jun 18, 2020
c41ed34
apply_tokenization merged
brandenchan Jun 18, 2020
2508f8c
clean apply_tokenization
brandenchan Jun 18, 2020
04f9d7b
Merge branch 'master' into nq_2
brandenchan Jun 22, 2020
e7b4e41
Rename samples to passages
brandenchan Jun 22, 2020
c13a890
Merge branch 'master' into nq_2
brandenchan Jun 22, 2020
56a7127
Clean id handling
brandenchan Jun 22, 2020
c33ce4d
rename is_impossible to no_answer
brandenchan Jun 22, 2020
55e0ad2
Rename preds_p to preds
brandenchan Jun 22, 2020
1308712
Add QAInference type hints
brandenchan Jun 22, 2020
61c5d7b
Adjust examples to new changes
brandenchan Jun 22, 2020
335b087
Fix type hint error
brandenchan Jun 22, 2020
abb4130
Check that label character index matches label str
brandenchan Jun 23, 2020
cb893b7
Minor improvements
brandenchan Jun 23, 2020
1b9e641
Enforce single label doc cls in preprocessing
brandenchan Jun 24, 2020
27e12ee
Refactor span_to_string, clean predictions objects
brandenchan Jun 24, 2020
61bc193
Remove unneccessary iteration
brandenchan Jun 24, 2020
acf8358
WIP clean and document predictions.py
brandenchan Jun 24, 2020
102763f
Add documentation of Pred objects
brandenchan Jun 25, 2020
18fb8bb
Merge branch 'master' into more_improvements
brandenchan Jun 25, 2020
e3d4bb6
Fix list index bug
brandenchan Jun 25, 2020
e619e2e
Merge branch 'more_improvements' of https://github.com/deepset-ai/FAR…
brandenchan Jun 25, 2020
aa3333c
Fix index in test sample
brandenchan Jun 25, 2020
1af739f
Refactor data check
brandenchan Jun 25, 2020
0d09698
Fix docstring
brandenchan Jun 25, 2020
0725e37
Simplify QA generate_labels()
brandenchan Jun 30, 2020
35c67f0
Rename internal methods
brandenchan Jun 30, 2020
28a66e7
update docstring
brandenchan Jun 30, 2020
651f8af
add input_features test
brandenchan Jul 1, 2020
2867a75
Add docstring
brandenchan Jul 1, 2020
605bf5b
Merge branch 'master' into more_improvements
brandenchan Jul 1, 2020
3446ff8
Fix import and error handling
brandenchan Jul 1, 2020
bb51fc9
Merge branch 'more_improvements' of https://github.com/deepset-ai/FAR…
brandenchan Jul 1, 2020
71738d2
Fix answer check
brandenchan Jul 1, 2020
41ec428
Fix sample check
brandenchan Jul 1, 2020
74c6e9c
move sample check to _sample_to_features
brandenchan Jul 1, 2020
171db3e
Pass QA inferencer args properly
brandenchan Jul 2, 2020
b4825f0
Rename span to qa_candidate
brandenchan Jul 2, 2020
fce94c7
Arg passing error causing Eval bug
brandenchan Jul 2, 2020
07b84e6
Fix bug in answer check
brandenchan Jul 2, 2020
473bf63
remove import
brandenchan Jul 2, 2020
ea76400
Remove reference to SampleError
brandenchan Jul 2, 2020
7d16016
Fix onnx sample
brandenchan Jul 2, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions examples/natural_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from farm.data_handler.data_silo import DataSilo
from farm.data_handler.processor import NaturalQuestionsProcessor
from farm.file_utils import fetch_archive_from_http
from farm.infer import Inferencer
from farm.infer import QAInferencer
from farm.modeling.adaptive_model import AdaptiveModel
from farm.modeling.language_model import LanguageModel
from farm.modeling.optimization import initialize_optimizer
Expand Down Expand Up @@ -68,7 +68,7 @@ def question_answering():
max_seq_len=384,
train_filename=train_filename,
dev_filename=dev_filename,
keep_is_impossible=keep_is_impossible,
keep_no_answer=keep_is_impossible,
downsample_context_size=downsample_context_size,
data_dir=Path("../data/natural_questions"),
)
Expand Down Expand Up @@ -131,7 +131,7 @@ def question_answering():
}
]

model = Inferencer.load(model_name_or_path="../saved_models/farm/roberta-base-squad2-nq", batch_size=batch_size, gpu=True)
model = QAInferencer.load(model_name_or_path="../saved_models/farm/roberta-base-squad2-nq", batch_size=batch_size, gpu=True)
result = model.inference_from_dicts(dicts=QA_input, return_json=False) # result is a list of QAPred objects

print(f"\nQuestion: Did GameTrailers rated Twilight Princess as one of the best games ever created?"
Expand Down
4 changes: 2 additions & 2 deletions examples/question_answering.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from farm.data_handler.data_silo import DataSilo
from farm.data_handler.processor import SquadProcessor
from farm.data_handler.utils import write_squad_predictions
from farm.infer import Inferencer
from farm.infer import QAInferencer
from farm.modeling.adaptive_model import AdaptiveModel
from farm.modeling.language_model import LanguageModel
from farm.modeling.optimization import initialize_optimizer
Expand Down Expand Up @@ -110,7 +110,7 @@ def question_answering():
"context": "Twilight Princess was released to universal critical acclaim and commercial success. It received perfect scores from major publications such as 1UP.com, Computer and Video Games, Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators GameRankings and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii version and scores of 95% and 96 for the GameCube version. GameTrailers in their review called it one of the greatest games ever created."
}]

model = Inferencer.load(save_dir, batch_size=40, gpu=True)
model = QAInferencer.load(save_dir, batch_size=40, gpu=True)
result = model.inference_from_dicts(dicts=QA_input)[0]

pprint.pprint(result)
Expand Down
120 changes: 51 additions & 69 deletions farm/data_handler/input_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def sample_to_features_text(
input_ids = pad(input_ids, max_seq_len, tokenizer.pad_token_id, pad_on_left=pad_on_left)
padding_mask = pad(padding_mask, max_seq_len, 0, pad_on_left=pad_on_left)


assert len(input_ids) == max_seq_len
assert len(padding_mask) == max_seq_len
assert len(segment_ids) == max_seq_len
Expand Down Expand Up @@ -307,10 +306,28 @@ def samples_to_features_bert_lm(sample, max_seq_len, tokenizer, next_sent_pred=T
return [feature_dict]


def sample_to_features_qa(sample, tokenizer, max_seq_len, answer_type_list=None, max_answers=6):
def sample_to_features_qa(sample, tokenizer, max_seq_len, sp_toks_start, sp_toks_mid,
answer_type_list=None, max_answers=6):
""" Prepares data for processing by the model. Supports cases where there are
multiple answers for the one question/document pair. max_answers is by default set to 6 since
that is the most number of answers in the squad2.0 dev set."""
that is the most number of answers in the squad2.0 dev set.

:param sample: A Sample object that contains one question / passage pair
:type sample: Sample
:param tokenizer: A Tokenizer object
:type tokenizer: Tokenizer
:param max_seq_len: The maximum sequence length
:type max_seq_len: int
:param sp_toks_start: The number of special tokens that come before the question tokens
:type sp_toks_start: int
:param sp_toks_mid: The number of special tokens that come between the question and passage tokens
:type sp_toks_mid: int
:param answer_type_list: A list of all the answer types that can be expected e.g. ["no_answer", "span", "yes", "no"] for Natural Questions
:type answer_type_list: List[str]
:param max_answers: The maximum number of answer annotations for a sample (In SQuAD, this is 6 hence the default)
:type max_answers: int
:return: dict (keys: [input_ids, padding_mask, segment_ids, answer_type_ids, passage_start_t, start_of_word, labels, id, seq_2_start_2])
"""

# Initialize some basic variables
question_tokens = sample.tokenized["question_tokens"]
Expand All @@ -329,9 +346,10 @@ def sample_to_features_qa(sample, tokenizer, max_seq_len, answer_type_list=None,
labels, answer_types = generate_labels(answers,
passage_len_t,
question_len_t,
tokenizer,
answer_type_list=answer_type_list,
max_answers=max_answers)
max_answers,
sp_toks_start,
sp_toks_mid,
answer_type_list)

# Generate a start of word vector for the full sequence (i.e. question + answer + special tokens).
# This will allow us to perform evaluation during training without clear text.
Expand Down Expand Up @@ -382,10 +400,9 @@ def sample_to_features_qa(sample, tokenizer, max_seq_len, answer_type_list=None,
if tokenizer.__class__.__name__ in ["XLMRobertaTokenizer", "RobertaTokenizer"]:
segment_ids = np.zeros_like(segment_ids)

# Todo: explain how only the first of labels will be used in train, and the full array will be used in eval
# TODO Offset, start of word and spec_tok_mask are not actually needed by model.forward() but are needed for model.formatted_preds()
# TODO passage_start_t is index of passage's first token relative to document
# I don't think we actually need offsets anymore
# The first of the labels will be used in train, and the full array will be used in eval.
# start of word and spec_tok_mask are not actually needed by model.forward() but are needed for model.formatted_preds()
# passage_start_t is index of passage's first token relative to document
feature_dict = {"input_ids": input_ids,
"padding_mask": padding_mask,
"segment_ids": segment_ids,
Expand All @@ -398,85 +415,50 @@ def sample_to_features_qa(sample, tokenizer, max_seq_len, answer_type_list=None,
return [feature_dict]


def generate_labels(answers, passage_len_t, question_len_t, tokenizer, max_answers, answer_type_list=None):
def generate_labels(answers, passage_len_t, question_len_t, max_answers,
sp_toks_start, sp_toks_mid, answer_type_list=None):
"""
Creates QA label for each answer in answers. The labels are the index of the start and end token
Creates QA label vector for each answer in answers. The labels are the index of the start and end token
relative to the passage. They are contained in an array of size (max_answers, 2).
-1 used to fill array since there the number of answers is often less than max_answers.
-1 is used to fill array since there the number of answers is often less than max_answers.
The index values take in to consideration the question tokens, and also special tokens such as [CLS].
When the answer is not fully contained in the passage, or the question
is impossible to answer, the start_idx and end_idx are 0 i.e. start and end are on the very first token
(in most models, this is the [CLS] token). Note that in our implementation NQ has 4 labels
(in most models, this is the [CLS] token). Note that in our implementation NQ has 4 answer types
["no_answer", "yes", "no", "span"] and this is what answer_type_list should look like"""

# Note here that label_idxs get passed to the QuestionAnsweringHead and answer_types get passed to the text
# classification head. label_idxs may contain multiple start, end labels since SQuAD dev and test sets
# can have multiple annotations. By contrast, Natural Questions only has one annotation per sample hence
# why answer_types is only of length 1
label_idxs = np.full((max_answers, 2), fill_value=-1)
answer_types = np.full((max_answers), fill_value=-1)
answer_types = np.asarray([-1])
answer_str = ""

# If there are no answers
if len(answers) == 0:
label_idxs[0, :] = 0
answer_types[:] = 0
return label_idxs, answer_types

# Iterate over the answers for the one sample
for i, answer in enumerate(answers):
answer_type = answer["answer_type"]
start_idx = answer["start_t"]
end_idx = answer["end_t"]

# We are going to operate on one-hot label vectors which will later be converted back to label indices.
# This is to take advantage of tokenizer.encode_plus() which applies model dependent special token conventions.
# The two label vectors (start and end) are composed of sections that correspond to the question and
# passage tokens. These are initialized here. The section corresponding to the question
# will always be composed of 0s.
start_vec_question = [0] * question_len_t
end_vec_question = [0] * question_len_t
start_vec_passage = [0] * passage_len_t
end_vec_passage = [0] * passage_len_t

# If the answer is in the current passage, populate the label vector with 1s for start and end
# Check that the start and end are contained within this passage
if answer_in_passage(start_idx, end_idx, passage_len_t):
start_vec_passage[start_idx] = 1
end_vec_passage[end_idx] = 1

# Combine the sections of the label vectors. The length of each of these will be:
# question_len_t + passage_len_t + n_special_tokens
start_vec = combine_vecs(start_vec_question,
start_vec_passage,
tokenizer,
spec_tok_val=0)
end_vec = combine_vecs(end_vec_question,
end_vec_passage,
tokenizer,
spec_tok_val=0)

start_label_present = 1 in start_vec
end_label_present = 1 in end_vec

# This is triggered if the answer is not in the passage or the question warrants a no_answer
# In both cases, the token at idx=0 (in BERT, this is the [CLS] token) is given both the start and end label
if start_label_present is False and end_label_present is False:
start_vec[0] = 1
end_vec[0] = 1
answer_type = "no_answer"
elif start_label_present is False or end_label_present is False:
raise Exception("The label vectors are lacking either a start or end label")

# Ensure label vectors are one-hot
assert sum(start_vec) == 1
assert sum(end_vec) == 1

start_idx = start_vec.index(1)
end_idx = end_vec.index(1)

label_idxs[i, 0] = start_idx
label_idxs[i, 1] = end_idx

# Only Natural Questions trains a classification head on answer_type, SQuAD only has the QA head. answer_type_list
# will be None for SQuAD but something like ["no_answer", "span", "yes", "no"] for Natural Questions
if answer_type_list:
answer_types[i] = answer_type_list.index(answer_type)

assert np.max(label_idxs) > -1
label_idxs[i][0] = sp_toks_start + question_len_t + sp_toks_mid + start_idx
label_idxs[i][1] = sp_toks_start + question_len_t + sp_toks_mid + end_idx
answer_str = answer["answer_type"]
# If the start or end of the span answer is outside the passage, treat passage as no_answer
else:
label_idxs[i][0] = 0
label_idxs[i][1] = 0
answer_str = "no_answer"

if answer_type_list:
answer_types[0] = answer_type_list.index(answer_str)

return label_idxs, answer_types

Expand Down
Loading