Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix and document Inferencer usage and pool handling #429

Merged
merged 7 commits into from
Jul 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/conversion_huggingface_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def convert_from_transformers():
"text": "The option to convert models between FARM and transformers gives freedom to the user and let people easily switch between frameworks."}]
result = nlp.inference_from_dicts(dicts=QA_input, rest_api_schema=True)
pprint.pprint(result)
nlp.close_multiprocessing_pool()

# save it
farm_model_dir = Path("../saved_models/bert-english-qa-large")
Expand Down
1 change: 1 addition & 0 deletions examples/conversion_huggingface_models_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def convert_from_transformers():
# # run predictions
result = nlp.inference_from_dicts(dicts=[{"text": "Was ein scheiß Nazi!"}], rest_api_schema=True)
pprint.pprint(result)
nlp.close_multiprocessing_pool()

# save it
nlp.save(farm_output_dir)
Expand Down
3 changes: 2 additions & 1 deletion examples/doc_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def doc_classifcation():
device=device)

# 7. Let it grow
trainer.train()
# trainer.train()

# 8. Hooray! You have a model. Store it:
save_dir = Path("saved_models/bert-german-doc-tutorial")
Expand All @@ -112,6 +112,7 @@ def doc_classifcation():
model = Inferencer.load(save_dir)
result = model.inference_from_dicts(dicts=basic_texts)
print(result)
model.close_multiprocessing_pool()


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion examples/doc_classification_cola.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def doc_classification_cola():
model = Inferencer.load(save_dir)
result = model.inference_from_dicts(dicts=basic_texts)
print(result)

model.close_multiprocessing_pool()

if __name__ == "__main__":
doc_classification_cola()
Expand Down
1 change: 1 addition & 0 deletions examples/doc_classification_custom_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ def doc_classifcation():
model = Inferencer.load(save_dir)
result = model.inference_from_dicts(dicts=basic_texts)
print(result)
model.close_multiprocessing_pool()


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions examples/doc_classification_multilabel.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def doc_classification_multilabel():
model = Inferencer.load(save_dir)
result = model.inference_from_dicts(dicts=basic_texts)
print(result)
model.close_multiprocessing_pool()


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions examples/doc_classification_multilabel_roberta.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def doc_classification_multilabel_roberta():
model = Inferencer.load(save_dir)
result = model.run_inference(dicts=basic_texts)
print(result)
model.close_multiprocessing_pool()


if __name__ == "__main__":
Expand Down
2 changes: 2 additions & 0 deletions examples/doc_classification_with_earlystopping.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,15 @@ def mymetrics(preds, labels):
model = Inferencer.load(save_dir)
result = model.inference_from_dicts(dicts=basic_texts)
print(result)
model.close_multiprocessing_pool()

# Load from saved best model
print("LOADING INFERENCER FROM BEST MODEL DURING TRAINING")
model = Inferencer.load(earlystopping.save_dir)
result = model.inference_from_dicts(dicts=basic_texts)
print("APPLICATION ON BEST MODEL")
print(result)
model.close_multiprocessing_pool()


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions examples/doc_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def doc_regression():
result = model.inference_from_dicts(dicts=basic_texts)

print(result)
model.close_multiprocessing_pool()


if __name__ == "__main__":
Expand Down
2 changes: 2 additions & 0 deletions examples/embeddings_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def embeddings_extraction():
# Get embeddings for input text (you can vary the strategy and layer)
result = model.inference_from_dicts(dicts=basic_texts)
print(result)
model.close_multiprocessing_pool()


if __name__ == "__main__":
embeddings_extraction()
4 changes: 3 additions & 1 deletion examples/embeddings_extraction_s3e_pooling.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def fit(language_model, corpus_path, save_dir, do_lower_case, batch_size=4, use_
# Get embeddings for input text (you can vary the strategy and layer)
result = inferencer.inference_from_dicts(dicts=basic_texts)
print(result)
inferencer.close_multiprocessing_pool()


def extract_embeddings(load_dir, use_gpu, batch_size):
Expand All @@ -94,12 +95,13 @@ def extract_embeddings(load_dir, use_gpu, batch_size):
# Get embeddings for input text
result = inferencer.inference_from_dicts(dicts=basic_texts)
print(result)
inferencer.close_multiprocessing_pool()


if __name__ == "__main__":
lang_model = "glove-english-uncased-6B"
do_lower_case = True

# You can download this from:
# "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-downstream/lm_finetune_nips.tar.gz"
corpus_path = Path("../data/lm_finetune_nips/train.txt")
Expand Down
34 changes: 34 additions & 0 deletions examples/inferencer_multiprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import pprint

from farm.infer import Inferencer


def inference_with_multiprocessing():
"""
The Inferencers(Inferencer/QAInferencer) create a multiprocessing Pool during the init, if the num_process argument
is set greater than 0. This helps speed up pre-processing that happens on the CPU, before the model's forward pass
on GPU(or CPU).

Having the pool at the Inferencer level allows re-use across multiple inference requests. However, it needs to be
closed properly to ensure there are no memory-leaks.

For production environments, the Inferencer object can be wrapped in a try-finally block like in this example to
ensure the Pool is closed even in the case of errors.
"""

try:
model = Inferencer.load("deepset/roberta-base-squad2", batch_size=40, task_type="question_answering", gpu=True)
QA_input = [
{
"qas": ["Who counted the game among the best ever made?"],
"context": "Twilight Princess was released to universal critical acclaim and commercial success. It received perfect scores from major publications such as 1UP.com, Computer and Video Games, Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators GameRankings and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii version and scores of 95% and 96 for the GameCube version. GameTrailers in their review called it one of the greatest games ever created."
}]
result = model.inference_from_dicts(dicts=QA_input)[0]

pprint.pprint(result)
finally:
model.close_multiprocessing_pool()


if __name__ == "__main__":
inference_with_multiprocessing()
1 change: 1 addition & 0 deletions examples/natural_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def question_answering():

print(f"\nQuestion: Did GameTrailers rated Twilight Princess as one of the best games ever created?"
f"\nAnswer from model: {result[0].prediction[0].answer}")
model.close_multiprcessing_pool()

if __name__ == "__main__":
question_answering()
Expand Down
2 changes: 2 additions & 0 deletions examples/ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ def ner():
result = model.inference_from_dicts(dicts=basic_texts)
print(result)

model.close_multiprocessing_pool()


if __name__ == "__main__":
ner()
1 change: 1 addition & 0 deletions examples/onnx_question_answering.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def onnx_runtime_example():

results = inferencer.inference_from_dicts(qa_input)
print(results)
inferencer.close_multiprocessing_pool()


if __name__ == "__main__":
Expand Down
2 changes: 2 additions & 0 deletions examples/passage_ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ def text_pair_classification():
qrels_file=data_dir / qrels_filename,
output_file=save_dir / predictions_filename)

model.close_multiprocessing_pool()


if __name__ == "__main__":
text_pair_classification()
Expand Down
1 change: 1 addition & 0 deletions examples/question_answering.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def question_answering():
result = model.inference_from_dicts(dicts=QA_input)[0]

pprint.pprint(result)
model.close_multiprocessing_pool()

# 10. Do Inference on whole SQuAD Dataset & write the predictions file to disk
filename = os.path.join(processor.data_dir,processor.dev_filename)
Expand Down
2 changes: 2 additions & 0 deletions examples/streaming_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ def streaming_inference_example():
for prediction in results: # results is a generator object that yields predictions
print(prediction)

inferencer.close_multiprocessing_pool()


def sample_dicts_generator():
"""
Expand Down
2 changes: 2 additions & 0 deletions examples/text_pair_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ def text_pair_classification():
result = model.inference_from_dicts(dicts=basic_texts)

print(result)
model.close_multiprocessing_pool()


if __name__ == "__main__":
text_pair_classification()
Expand Down
1 change: 1 addition & 0 deletions examples/wordembedding_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def embedding_extraction():
extraction_layer=-1
)
print(result)
inferencer.close_multiprocessing_pool()


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions farm/inference_rest_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

INFERENCERS = {}
for idx, model_dir in enumerate(model_paths):
# refer to examples/inferencer_multiprocessing.py for using multiprocessing in the Inferencers.
INFERENCERS[idx + 1] = Inferencer.load(str(model_dir), num_processes=0)

app = Flask(__name__)
Expand Down
11 changes: 7 additions & 4 deletions farm/modeling/wordembedding_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,10 +605,13 @@ def fit_s3e_on_corpus(processor, model, corpus, n_clusters=10,
sentences = [{"text": s} for s in corpus.split("\n") if len(s.strip()) > 0]

# Get embeddings
inferencer = Inferencer(model=model, processor=processor, task_type="embeddings", gpu=use_gpu,
batch_size=batch_size, extraction_strategy="s3e", extraction_layer=-1,
s3e_stats=s3e_stats)
result = inferencer.inference_from_dicts(dicts=sentences)
try:
inferencer = Inferencer(model=model, processor=processor, task_type="embeddings", gpu=use_gpu,
batch_size=batch_size, extraction_strategy="s3e", extraction_layer=-1,
s3e_stats=s3e_stats)
result = inferencer.inference_from_dicts(dicts=sentences)
finally:
inferencer.close_multiprocessing_pool()
sentence_embeddings = [s["vec"] for s in result]
sentence_embeddings = np.vstack(sentence_embeddings)

Expand Down
12 changes: 8 additions & 4 deletions test/benchmarks/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,12 @@ def onnx_adaptive_model_qa(use_gpu, num_processes, model_name_or_path="deepset/b
)
model.convert_to_onnx(onnx_model_path)

model = Inferencer.load(
onnx_model_path, task_type="question_answering", batch_size=1, num_processes=num_processes, gpu=use_gpu
)
try:
model = Inferencer.load(
onnx_model_path, task_type="question_answering", batch_size=1, num_processes=num_processes, gpu=use_gpu
)
yield model
finally:
if num_processes != 0:
model.close_multiprocessing_pool()

return model
3 changes: 2 additions & 1 deletion test/benchmarks/question_answering_accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def test_evaluation():
filename = data_dir / evaluation_filename
result = model.inference_from_file(file=filename, return_json=False, multiprocessing_chunksize=80)
results_squad = [x.to_squad_eval() for x in result]
model.close_multiprocessing_pool()

elapsed = time() - starttime

Expand Down Expand Up @@ -215,4 +216,4 @@ def train_evaluation_single(seed=42):

test_evaluation()

train_evaluation_single(seed=42)
train_evaluation_single(seed=42)