diff --git a/test/benchmarks/question_answering_components.py b/test/benchmarks/question_answering_components.py index fb9e2f8ce..4aad9f7a0 100644 --- a/test/benchmarks/question_answering_components.py +++ b/test/benchmarks/question_answering_components.py @@ -32,6 +32,7 @@ def benchmark(params, output="results_component_test.csv"): ds = generate_param_dicts(params) print(f"Running {len(ds)} benchmarks...") results = [] + warmup_run() for d in tqdm(ds): result = benchmark_single(**d) results.append(result) @@ -40,6 +41,25 @@ def benchmark(params, output="results_component_test.csv"): df.to_csv(output) +def warmup_run(): + """ This run warms up the gpu. We saw cases where the first run in the loop took longer or showed different + time profile characteristics. This warm up run is intended to reduce this kind of fluctation. """ + question = [l[:-1] for l in open(questions_file)][0] + document_size = 100_000 + input_dict = prepare_dict(sample_file, question, document_size) + # Run once with real prediction heads + inferencer = Inferencer.load("deepset/bert-base-cased-squad2", + batch_size=16, + gpu=True, + task_type=task_type, + max_seq_len=384, + num_processes=num_processes, + doc_stride=128, + dummy_ph=False, + benchmarking=True) + inferencer.inference_from_dicts(input_dict) + + def benchmark_single(batch_size, gpu, max_seq_len, doc_stride, document_size, question, modelname): try: input_dict = prepare_dict(sample_file, question, document_size)