From 95ed6bb94e720b0e7fddc1414e83fe1aadb9e6de Mon Sep 17 00:00:00 2001 From: Anna Petrasova Date: Wed, 2 Feb 2022 00:11:24 -0500 Subject: [PATCH] grass.benchmark: shuffle runs for benchmarking by number of cores To avoid bias when other processes are running at the same time --- python/grass/benchmark/runners.py | 36 +++++++++++++++++-------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/python/grass/benchmark/runners.py b/python/grass/benchmark/runners.py index ddf5d207cc9..715e50523dc 100644 --- a/python/grass/benchmark/runners.py +++ b/python/grass/benchmark/runners.py @@ -15,6 +15,7 @@ """Basic functions for benchmarking modules""" import shutil +import random from types import SimpleNamespace import grass.script as gs @@ -68,7 +69,7 @@ def benchmark_single(module, label, repeat=5): ) -def benchmark_nprocs(module, label, max_nprocs, repeat=5): +def benchmark_nprocs(module, label, max_nprocs, repeat=5, shuffle=True): """Benchmark module using values of nprocs up to *max_nprocs*. *module* is an instance of PyGRASS Module class or any object which @@ -82,6 +83,8 @@ def benchmark_nprocs(module, label, max_nprocs, repeat=5): to generate a continuous range of integer values from 1 up to *max_nprocs*. *repeat* sets how many times the each run is repeated. So, the module will run ``max_nprocs * repeat`` times. + Runs are executed in random order, set *shuffle* to false if they + need to be executed in order based on number of threads. *label* is a text to add to the result (for user-facing display). Optional *nprocs* is passed to the module if present. @@ -102,27 +105,28 @@ def benchmark_nprocs(module, label, max_nprocs, repeat=5): avg_times = [] all_times = [] nprocs_list = list(range(1, max_nprocs + 1)) - for nprocs in nprocs_list: - print("\u2500" * term_size.columns) - print(f"Benchmark with {nprocs} thread(s)...\n") - time_sum = 0 - measured_times = [] - for _ in range(repeat): - module.update(nprocs=nprocs) - module.run() - print(f"{module.time}s") - time_sum += module.time - measured_times.append(module.time) - - avg = time_sum / repeat + nprocs_list_shuffled = sorted(nprocs_list * repeat) + if shuffle: + random.shuffle(nprocs_list_shuffled) + times = {} + print("\u2500" * term_size.columns) + for nprocs in nprocs_list_shuffled: + module.update(nprocs=nprocs) + module.run() + print(f"Run with {nprocs} thread(s) took {module.time}s\n") + if nprocs in times: + times[nprocs] += [module.time] + else: + times[nprocs] = [module.time] + for nprocs in sorted(times): + avg = sum(times[nprocs]) / repeat avg_times.append(avg) - all_times.append(measured_times) + all_times.append(times[nprocs]) if nprocs == 1: serial_avg = avg if avg < min_avg: min_avg = avg min_time = nprocs - print(f"\nResult - {avg}s") print("\u2500" * term_size.columns) if serial_avg is not None: