grass.benchmark: Compute speedup and enable plotting speedup/efficien…

…cy (OSGeo#3835) * add speedup in results, enable nprocs to plot speed or efficiency * replace simple namespace with dictionary * replace simplenamespace with dict in benchmark * update tests for replacements * revert unnecessary changes, initialize a simplenamespace result * add a unittest for benchmark_nprocs * Update cli and its test * create a new axis for plot instead of using an exist one Co-authored-by: Anna Petrasova <kratochanna@gmail.com> * correct the formula for speedup Co-authored-by: Anna Petrasova <kratochanna@gmail.com> * add parameter doc in the plot function --------- Co-authored-by: Anna Petrasova <kratochanna@gmail.com>
a0x8o · Jun 17, 2024 · effa231 · effa231
1 parent bfd472d
commit effa231
Show file tree

Hide file tree

Showing 5 changed files with 128 additions and 21 deletions.
diff --git a/python/grass/benchmark/app.py b/python/grass/benchmark/app.py
@@ -167,6 +167,7 @@ def plot_nprocs_cli(args):
         results.results,
         filename=args.output,
         title=args.title,
+        metric=args.metric,
     )
 
 
@@ -326,9 +327,17 @@ def add_results_subcommand(parent_subparsers):
 
 def add_plot_io_arguments(parser):
     """Add input and output arguments to *parser*."""
-    parser.add_argument("input", help="file with results (JSON)", metavar="input_file")
     parser.add_argument(
-        "output", help="output file (e.g., PNG)", nargs="?", metavar="output_file"
+        "input", help="file with results (e.g. results.json)", metavar="input_file"
+    )
+    parser.add_argument(
+        "output",
+        help=(
+            "output file with extension (e.g., figure.png)."
+            " If not provided, the plot will be opened in a new window."
+        ),
+        nargs="?",
+        metavar="output_file",
     )
 
 
@@ -341,6 +350,7 @@ def add_plot_title_argument(parser):
     )
 
 
+<<<<<<< HEAD
 <<<<<<< HEAD
 <<<<<<< HEAD
 =======
@@ -367,6 +377,18 @@ def add_plot_title_argument(parser):
 >>>>>>> osgeo-main
 =======
 >>>>>>> osgeo-main
+=======
+def add_plot_metric_argument(parser):
+    """Add metric argument to *parser*."""
+    parser.add_argument(
+        "--metric",
+        help="Metric for the plot (default: time)",
+        default="time",
+        choices=["time", "speedup", "efficiency"],
+    )
+
+
+>>>>>>> c55184d3f6 (grass.benchmark: Compute speedup and enable plotting speedup/efficiency (#3835))
 def add_plot_subcommand(parent_subparsers):
     """Add plot subcommand."""
     main_parser = add_subcommand_parser(
@@ -459,6 +481,7 @@ def add_plot_subcommand(parent_subparsers):
     )
     add_plot_io_arguments(nprocs)
     add_plot_title_argument(nprocs)
+    add_plot_metric_argument(nprocs)
     nprocs.set_defaults(handler=plot_nprocs_cli)
 
 <<<<<<< HEAD
@@ -490,6 +513,7 @@ def define_arguments():
     parser = argparse.ArgumentParser(
         description="Process results from module benchmarks.",
         prog=get_executable_name(),
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
     subparsers = add_subparsers(parser, dest="command")
 

diff --git a/python/grass/benchmark/plots.py b/python/grass/benchmark/plots.py
@@ -40,6 +40,7 @@ def get_pyplot(to_file):
 <<<<<<< HEAD
 <<<<<<< HEAD
 <<<<<<< HEAD
+<<<<<<< HEAD
 =======
 <<<<<<< HEAD
 =======
@@ -60,6 +61,9 @@ def get_pyplot(to_file):
 =======
 >>>>>>> osgeo-main
 def nprocs_plot(results, filename=None, title=None):
+=======
+def nprocs_plot(results, filename=None, title=None, metric="time"):
+>>>>>>> c55184d3f6 (grass.benchmark: Compute speedup and enable plotting speedup/efficiency (#3835))
     """Plot results from a multiple nprocs (thread) benchmarks.
 
     *results* is a list of individual results from separate benchmarks.
@@ -98,24 +102,37 @@ def nprocs_plot(results, filename=None):
     from the *nprocs* list.
     The *label* attribute identifies the benchmark in the legend.
 
+    *metric* can be "time", "speedup", or "efficiency".
+    This function plots a corresponding figure based on the chosen metric.
+
     Optionally, result can have an *all_times* attribute which is a list
     of lists. One sublist is all times recorded for each value of nprocs.
 
     Each result can come with a different list of nprocs, i.e., benchmarks
     which used different values for nprocs can be combined in one plot.
     """
+    ylabel = ""
     plt = get_pyplot(to_file=bool(filename))
-    axes = plt.gca()
+    _, axes = plt.subplots()
 
     x_ticks = set()  # gather x values
     for result in results:
         x = result.nprocs
         x_ticks.update(x)
-        plt.plot(x, result.times, label=result.label)
-        if hasattr(result, "all_times"):
+        if metric == "time":
             mins = [min(i) for i in result.all_times]
             maxes = [max(i) for i in result.all_times]
+            plt.plot(x, result.times, label=result.label)
             plt.fill_between(x, mins, maxes, color="gray", alpha=0.3)
+            ylabel = "Time [s]"
+        elif metric in ["speedup", "efficiency"]:
+            ylabel = metric.title()
+            plt.plot(x, getattr(result, metric), label=result.label)
+        else:
+            raise ValueError(
+                f"Invalid metric '{metric}' in result, it should be:\
+                'time', 'speedup' or 'efficiency'"
+            )
     plt.legend()
 <<<<<<< HEAD
 <<<<<<< HEAD
@@ -172,12 +189,13 @@ def nprocs_plot(results, filename=None):
 
         axes.xaxis.set_major_locator(MaxNLocator(integer=True))
     plt.xlabel("Number of processing elements (cores, threads, processes)")
-    plt.ylabel("Time [s]")
+    plt.ylabel(ylabel)
     if title:
         plt.title(title)
-    else:
+    elif metric == "times":
         plt.title("Execution time by processing elements")
 <<<<<<< HEAD
+<<<<<<< HEAD
 =======
 <<<<<<< HEAD
 <<<<<<< HEAD
@@ -208,6 +226,10 @@ def nprocs_plot(results, filename=None):
 >>>>>>> osgeo-main
 =======
 >>>>>>> osgeo-main
+=======
+    elif metric in ["speedup", "efficiency"]:
+        plt.title(f"{metric.title()} by processing elements")
+>>>>>>> c55184d3f6 (grass.benchmark: Compute speedup and enable plotting speedup/efficiency (#3835))
     if filename:
         plt.savefig(filename)
     else:

diff --git a/python/grass/benchmark/runners.py b/python/grass/benchmark/runners.py
@@ -247,11 +247,10 @@ def benchmark_nprocs(module, label, max_nprocs, repeat=5):
     min_avg = float("inf")
     min_time = None
     serial_avg = None
-    avg_times = []
-    all_times = []
-    efficiency = []
-    nprocs_list = list(range(1, max_nprocs + 1))
-    nprocs_list_shuffled = sorted(nprocs_list * repeat)
+    result = SimpleNamespace(times=[], all_times=[], speedup=[], efficiency=[])
+    result.nprocs = list(range(1, max_nprocs + 1))
+    result.label = label
+    nprocs_list_shuffled = sorted(result.nprocs * repeat)
     if shuffle:
         random.shuffle(nprocs_list_shuffled)
     times = {}
@@ -266,6 +265,7 @@ def benchmark_nprocs(module, label, max_nprocs, repeat=5):
             times[nprocs] = [module.time]
     for nprocs in sorted(times):
         avg = sum(times[nprocs]) / repeat
+<<<<<<< HEAD
         avg_times.append(avg)
         all_times.append(times[nprocs])
 <<<<<<< HEAD
@@ -330,6 +330,10 @@ def benchmark_nprocs(module, label, max_nprocs, repeat=5):
 >>>>>>> osgeo-main
 =======
 >>>>>>> osgeo-main
+=======
+        result.times.append(avg)
+        result.all_times.append(times[nprocs])
+>>>>>>> c55184d3f6 (grass.benchmark: Compute speedup and enable plotting speedup/efficiency (#3835))
         if nprocs == 1:
             serial_avg = avg
         if avg < min_avg:
@@ -338,6 +342,7 @@ def benchmark_nprocs(module, label, max_nprocs, repeat=5):
 <<<<<<< HEAD
 <<<<<<< HEAD
 <<<<<<< HEAD
+<<<<<<< HEAD
 =======
 <<<<<<< HEAD
 =======
@@ -358,6 +363,10 @@ def benchmark_nprocs(module, label, max_nprocs, repeat=5):
 =======
 >>>>>>> osgeo-main
         efficiency.append(serial_avg / (nprocs * avg))
+=======
+        result.speedup.append(serial_avg / avg)
+        result.efficiency.append(serial_avg / (nprocs * avg))
+>>>>>>> c55184d3f6 (grass.benchmark: Compute speedup and enable plotting speedup/efficiency (#3835))
 
     print("\u2500" * term_size.columns)
     if serial_avg is not None:
@@ -393,6 +402,7 @@ def benchmark_nprocs(module, label, max_nprocs, repeat=5):
 >>>>>>> osgeo-main
     print(f"Best average time - {min_avg}s ({min_time} threads)\n")
 
+<<<<<<< HEAD
     return SimpleNamespace(
         all_times=all_times,
         times=avg_times,
@@ -434,6 +444,9 @@ def benchmark_nprocs(module, label, max_nprocs, repeat=5):
         nprocs=nprocs_list,
         label=label,
     )
+=======
+    return result
+>>>>>>> c55184d3f6 (grass.benchmark: Compute speedup and enable plotting speedup/efficiency (#3835))
 
 
 def benchmark_resolutions(module, resolutions, label, repeat=5, nprocs=None):

diff --git a/python/grass/benchmark/testsuite/test_benchmark.py b/python/grass/benchmark/testsuite/test_benchmark.py
@@ -21,6 +21,10 @@
 <<<<<<< HEAD
 <<<<<<< HEAD
 <<<<<<< HEAD
+<<<<<<< HEAD
+=======
+    benchmark_nprocs,
+>>>>>>> c55184d3f6 (grass.benchmark: Compute speedup and enable plotting speedup/efficiency (#3835))
     benchmark_single,
 =======
 <<<<<<< HEAD
@@ -138,6 +142,7 @@ def test_single(self):
             self.assertEqual(len(result.all_times), repeat)
         self.assertEqual(results[0].label, label)
 
+<<<<<<< HEAD
 <<<<<<< HEAD
 <<<<<<< HEAD
 =======
@@ -161,6 +166,32 @@ def test_single(self):
 >>>>>>> osgeo-main
 =======
 >>>>>>> osgeo-main
+=======
+    def test_nprocs(self):
+        """Test that benchmark function runs for nprocs"""
+        label = "Standard output"
+        repeat = 4
+        benchmarks = [
+            dict(
+                module=Module("r.univar", map="elevation", stdout_=DEVNULL, run_=False),
+                label=label,
+                max_nprocs=4,
+            )
+        ]
+        results = []
+        for benchmark in benchmarks:
+            results.append(benchmark_nprocs(**benchmark, repeat=repeat, shuffle=True))
+        self.assertEqual(len(results), len(benchmarks))
+        for result in results:
+            self.assertTrue(hasattr(result, "times"))
+            self.assertTrue(hasattr(result, "all_times"))
+            self.assertTrue(hasattr(result, "speedup"))
+            self.assertTrue(hasattr(result, "efficiency"))
+            self.assertTrue(hasattr(result, "label"))
+            self.assertEqual(len(result.all_times), repeat)
+        self.assertEqual(results[0].label, label)
+
+>>>>>>> c55184d3f6 (grass.benchmark: Compute speedup and enable plotting speedup/efficiency (#3835))
 
 class TestBenchmarkResults(TestCase):
     """Tests that saving results work"""

diff --git a/python/grass/benchmark/testsuite/test_benchmark_cli.py b/python/grass/benchmark/testsuite/test_benchmark_cli.py
@@ -138,14 +138,14 @@ class TestBenchmarkCLI(TestCase):
     """Tests that benchmarkin CLI works"""
 
     json_filename = "plot_test.json"
-    png_filename1 = "plot_test1.png"
-    png_filename2 = "plot_test2.png"
+    png_filenames = [f"plot_test1_{i}.png" for i in range(4)]
+    png_filenames.append("plot_test2.png")
 
     def tearDown(self):
         """Remove test files"""
         remove_file(self.json_filename)
-        remove_file(self.png_filename1)
-        remove_file(self.png_filename2)
+        for filename in self.png_filenames:
+            remove_file(filename)
 
     def test_plot_nprocs_workflow(self):
         """Test that plot nprocs workflow runs"""
@@ -162,8 +162,15 @@ def test_plot_nprocs_workflow(self):
         except grass.exceptions.ParameterError:
             self.skipTest("r.univar without nprocs parameter")
         save_results_to_file([result], self.json_filename)
-        benchmark_main(["plot", "nprocs", self.json_filename, self.png_filename1])
-        self.assertTrue(Path(self.png_filename1).is_file())
+
+        metrics = ["time", "speedup", "efficiency"]
+        benchmark_main(["plot", "nprocs", self.json_filename, self.png_filenames[0]])
+        for png_fname, metric in zip(self.png_filenames[1:4], metrics):
+            benchmark_main(
+                ["plot", "nprocs", "--metric", metric, self.json_filename, png_fname]
+            )
+        for filename in self.png_filenames[:4]:
+            self.assertTrue(Path(filename).is_file())
 
     def test_plot_cells_workflow(self):
         """Test that plot cells workflow runs"""
@@ -260,11 +267,18 @@ def test_plot_cells_workflow(self):
 >>>>>>> 8422103f4c (wxpyimgview: explicit conversion to int (#2704))
 >>>>>>> osgeo-main
         save_results_to_file([result], self.json_filename)
-        benchmark_main(["plot", "cells", self.json_filename, self.png_filename1])
-        self.assertTrue(Path(self.png_filename1).is_file())
+        benchmark_main(["plot", "cells", self.json_filename, self.png_filenames[0]])
+        self.assertTrue(Path(self.png_filenames[0]).is_file())
         benchmark_main(
-            ["plot", "cells", "--resolutions", self.json_filename, self.png_filename2]
+            [
+                "plot",
+                "cells",
+                "--resolutions",
+                self.json_filename,
+                self.png_filenames[-1],
+            ]
         )
+<<<<<<< HEAD
         self.assertTrue(Path(self.png_filename2).is_file())
 <<<<<<< HEAD
 =======
@@ -302,6 +316,9 @@ def test_plot_cells_workflow(self):
 >>>>>>> osgeo-main
 =======
 >>>>>>> osgeo-main
+=======
+        self.assertTrue(Path(self.png_filenames[-1]).is_file())
+>>>>>>> c55184d3f6 (grass.benchmark: Compute speedup and enable plotting speedup/efficiency (#3835))
 
 
 if __name__ == "__main__":