diff --git a/raster/r.patch/Makefile b/raster/r.patch/Makefile index 66fb20161e2..a44cd2e0a41 100644 --- a/raster/r.patch/Makefile +++ b/raster/r.patch/Makefile @@ -2,8 +2,9 @@ MODULE_TOPDIR = ../.. PGM = r.patch -LIBES = $(RASTERLIB) $(GISLIB) +LIBES = $(RASTERLIB) $(GISLIB) $(OMPLIB) DEPENDENCIES = $(RASTERDEP) $(GISDEP) +EXTRA_CFLAGS = $(OMPCFLAGS) include $(MODULE_TOPDIR)/include/Make/Module.make diff --git a/raster/r.patch/benchmark/benchmark_r_patch.py b/raster/r.patch/benchmark/benchmark_r_patch.py new file mode 100644 index 00000000000..524f6044add --- /dev/null +++ b/raster/r.patch/benchmark/benchmark_r_patch.py @@ -0,0 +1,82 @@ +"""Benchmarking of r.patch + +@author Aaron Saw Min Sern +""" + +from grass.exceptions import CalledModuleError +from grass.pygrass.modules import Module +from subprocess import DEVNULL + +import grass.benchmark as bm + + +def main(): + results = [] + + # Users can add more or modify existing reference maps + benchmark(7071, "r.patch_50M", results) + benchmark(14142, "r.patch_200M", results) + benchmark(10000, "r.patch_100M", results) + benchmark(20000, "r.patch_400M", results) + + bm.nprocs_plot(results, filename="rpatch_benchmark.svg") + + +def benchmark(size, label, results): + references = ["r_patch_reference_map_{}".format(i) for i in range(4)] + output = "benchmark_r_patch" + + generate_map(rows=size, cols=size, fname=references) + module = Module( + "r.patch", + input=references, + output=output, + run_=False, + stdout_=DEVNULL, + overwrite=True, + ) + results.append(bm.benchmark_nprocs(module, label=label, max_nprocs=24, repeat=3)) + for r in references: + Module("g.remove", quiet=True, flags="f", type="raster", name=r) + Module("g.remove", quiet=True, flags="f", type="raster", name=output) + + +def generate_map(rows, cols, fname): + temp = "r_patch_reference_tmp" + + Module("g.region", flags="p", s=0, n=rows, w=0, e=cols, res=1) + # Generate using r.random.surface if r.surf.fractal fails + try: + print("Generating reference map using r.surf.fractal...") + for r in fname: + Module( + "r.surf.fractal", + output=temp, + overwrite=True, + ) + # Make approximate half of the reference map to be null + Module( + "r.mapcalc", + expression=f"{r} = if({temp}, {temp}, 0, null())", + overwrite=True, + ) + except CalledModuleError: + print("r.surf.fractal fails, using r.random.surface instead...") + for r in fname: + Module( + "r.random.surface", + maximum=255, + output=temp, + overwrite=True, + ) + Module( + "r.mapcalc", + expression=f"{r} = if({temp} - 128, {temp}, 0, null())", + overwrite=True, + ) + + Module("g.remove", quiet=True, flags="f", type="raster", name=temp) + + +if __name__ == "__main__": + main() diff --git a/raster/r.patch/benchmark/benchmark_r_patch_memory.py b/raster/r.patch/benchmark/benchmark_r_patch_memory.py new file mode 100644 index 00000000000..98df6f8ce56 --- /dev/null +++ b/raster/r.patch/benchmark/benchmark_r_patch_memory.py @@ -0,0 +1,84 @@ +"""Benchmarking of r.patch memory parameter + +@author Aaron Saw Min Sern +@author Anna Petrasova +""" + +from grass.exceptions import CalledModuleError +from grass.pygrass.modules import Module +from subprocess import DEVNULL + +import grass.benchmark as bm + + +def main(): + results = [] + + references = ["r_patch_reference_map_{}".format(i) for i in range(4)] + generate_map(rows=5000, cols=5000, fname=references) + # Users can add more or modify existing reference maps + benchmark(0, "r.patch_0MB", references, results) + benchmark(5, "r.patch_5MB", references, results) + benchmark(10, "r.patch_10MB", references, results) + benchmark(100, "r.patch_100MB", references, results) + benchmark(300, "r.patch_300MB", references, results) + for r in references: + Module("g.remove", quiet=True, flags="f", type="raster", name=r) + bm.nprocs_plot(results, filename="rpatch_benchmark_memory.svg") + + +def benchmark(memory, label, references, results): + output = "benchmark_r_patch" + + module = Module( + "r.patch", + input=references, + output=output, + memory=memory, + run_=False, + stdout_=DEVNULL, + overwrite=True, + ) + results.append(bm.benchmark_nprocs(module, label=label, max_nprocs=24, repeat=10)) + Module("g.remove", quiet=True, flags="f", type="raster", name=output) + + +def generate_map(rows, cols, fname): + temp = "r_patch_reference_tmp" + + Module("g.region", flags="p", s=0, n=rows, w=0, e=cols, res=1) + # Generate using r.random.surface if r.surf.fractal fails + try: + print("Generating reference map using r.surf.fractal...") + for r in fname: + Module( + "r.surf.fractal", + output=temp, + overwrite=True, + ) + # Make approximate half of the reference map to be null + Module( + "r.mapcalc", + expression=f"{r} = if({temp}, {temp}, 0, null())", + overwrite=True, + ) + except CalledModuleError: + print("r.surf.fractal fails, using r.random.surface instead...") + for r in fname: + Module( + "r.random.surface", + maximum=255, + output=temp, + overwrite=True, + ) + Module( + "r.mapcalc", + expression=f"{r} = if({temp} - 128, {temp}, 0, null())", + overwrite=True, + ) + + Module("g.remove", quiet=True, flags="f", type="raster", name=temp) + + +if __name__ == "__main__": + main() diff --git a/raster/r.patch/main.c b/raster/r.patch/main.c index 0aa07049650..b95fc882c8d 100644 --- a/raster/r.patch/main.c +++ b/raster/r.patch/main.c @@ -17,7 +17,11 @@ * for details. * *****************************************************************************/ +#if defined(_OPENMP) +#include +#endif #include +#include #include #include @@ -28,7 +32,7 @@ int main(int argc, char *argv[]) { - int *infd; + int **infd; struct Categories cats; struct Cell_stats *statf; struct Colors colr; @@ -38,10 +42,13 @@ int main(int argc, char *argv[]) RASTER_MAP_TYPE out_type, map_type; size_t out_cell_size; struct History history; - void *presult, *patch; + void **presult, **patch; + void *outbuf; + int bufrows; int nfiles; char *rname; - int i; + int i, t; + int nprocs; int row, nrows, ncols; int use_zero, no_support; char *new_name; @@ -52,7 +59,7 @@ int main(int argc, char *argv[]) struct GModule *module; struct Flag *zeroflag, *nosupportflag; - struct Option *opt1, *opt2; + struct Option *opt1, *opt2, *threads, *memory; G_gisinit(argv[0]); @@ -64,6 +71,7 @@ int main(int argc, char *argv[]) G_add_keyword(_("patching")); G_add_keyword(_("aggregation")); G_add_keyword(_("series")); + G_add_keyword(_("parallel")); module->description = _("Creates a composite raster map layer by using " "known category values from one (or more) map layer(s) " @@ -77,6 +85,9 @@ int main(int argc, char *argv[]) opt2 = G_define_standard_option(G_OPT_R_OUTPUT); opt2->description = _("Name for resultant raster map"); + threads = G_define_standard_option(G_OPT_M_NPROCS); + memory = G_define_standard_option(G_OPT_MEMORYMB); + /* Define the different flags */ zeroflag = G_define_flag(); @@ -90,6 +101,18 @@ int main(int argc, char *argv[]) if (G_parser(argc, argv)) exit(EXIT_FAILURE); + sscanf(threads->answer, "%d", &nprocs); + if (nprocs < 1) + G_fatal_error(_("<%d> is not valid number of nprocs."), nprocs); +#if defined(_OPENMP) + omp_set_num_threads(nprocs); +#else + if (nprocs != 1) + G_warning(_("GRASS is compiled without OpenMP support. Ignoring " + "threads setting.")); + nprocs = 1; +#endif + use_zero = (zeroflag->answer); no_support = (nosupportflag->answer); @@ -102,7 +125,9 @@ int main(int argc, char *argv[]) if (nfiles < 2) G_fatal_error(_("The minimum number of input raster maps is two")); - infd = G_malloc(nfiles * sizeof(int)); + infd = G_malloc(nprocs * sizeof(int*)); + for (t = 0; t < nprocs; t++) + infd[t] = G_malloc(nfiles * sizeof(int)); statf = G_malloc(nfiles * sizeof(struct Cell_stats)); cellhd = G_malloc(nfiles * sizeof(struct Cell_head)); @@ -110,9 +135,11 @@ int main(int argc, char *argv[]) const char *name = names[i]; int fd; - fd = Rast_open_old(name, ""); + for (t = 0; t < nprocs; t++) { + infd[t][i] = Rast_open_old(name, ""); + } - infd[i] = fd; + fd = infd[0][i]; map_type = Rast_get_map_type(fd); if (map_type == FCELL_TYPE && out_type == CELL_TYPE) @@ -130,46 +157,109 @@ int main(int argc, char *argv[]) rname = opt2->answer; outfd = Rast_open_new(new_name = rname, out_type); - presult = Rast_allocate_buf(out_type); - patch = Rast_allocate_buf(out_type); + presult = G_malloc(nprocs * sizeof *presult); + patch = G_malloc(nprocs * sizeof *patch); + for (t = 0; t < nprocs; t++) { + presult[t] = Rast_allocate_buf(out_type); + } + for (t = 0; t < nprocs; t++) { + patch[t] = Rast_allocate_buf(out_type); + } Rast_get_window(&window); nrows = Rast_window_rows(); ncols = Rast_window_cols(); + bufrows = atoi(memory->answer) * (((1 << 20) / out_cell_size) / ncols); + /* set the output buffer rows to be at most covering the entire map */ + if (bufrows > nrows) { + bufrows = nrows; + } + /* but at least the number of threads */ + if (bufrows < nprocs) { + bufrows = nprocs; + } + + outbuf = G_malloc(out_cell_size * ncols * bufrows); + G_verbose_message(_("Percent complete...")); - for (row = 0; row < nrows; row++) { - double north_edge, south_edge; - - G_percent(row, nrows, 2); - Rast_get_row(infd[0], presult, row, out_type); - - north_edge = Rast_row_to_northing(row, &window); - south_edge = north_edge - window.ns_res; - - if (out_type == CELL_TYPE) - Rast_update_cell_stats((CELL *) presult, ncols, &statf[0]); - for (i = 1; i < nfiles; i++) { - /* check if raster i overlaps with the current row */ - if (south_edge >= cellhd[i].north || - north_edge <= cellhd[i].south || - window.west >= cellhd[i].east || - window.east <= cellhd[i].west) - continue; - - Rast_get_row(infd[i], patch, row, out_type); - if (!do_patch(presult, patch, &statf[i], ncols, out_type, - out_cell_size, use_zero)) - break; + + int computed = 0; + int written = 0; + + while (written < nrows) { + int start = written; + int end = start + bufrows; + if (end > nrows) + end = nrows; + +#pragma omp parallel private(i) if(nprocs > 1) + { + int t_id = 0; +#if defined(_OPENMP) + t_id = omp_get_thread_num(); +#endif + void *local_presult = presult[t_id]; + void *local_patch = patch[t_id]; + int *local_infd = infd[t_id]; + +#pragma omp for schedule(static) + for (row = start; row < end; row++) { + double north_edge, south_edge; + + G_percent(computed, nrows, 2); + Rast_get_row(local_infd[0], local_presult, row, out_type); + + north_edge = Rast_row_to_northing(row, &window); + south_edge = north_edge - window.ns_res; + + if (out_type == CELL_TYPE) + Rast_update_cell_stats((CELL *) local_presult, ncols, + &statf[0]); + for (i = 1; i < nfiles; i++) { + /* check if raster i overlaps with the current row */ + if (south_edge >= cellhd[i].north || + north_edge <= cellhd[i].south || + window.west >= cellhd[i].east || + window.east <= cellhd[i].west) + continue; + + Rast_get_row(local_infd[i], local_patch, row, out_type); + if (!do_patch(local_presult, local_patch, &statf[i], ncols, + out_type, out_cell_size, use_zero)) + break; + } + void *p = G_incr_void_ptr(outbuf, out_cell_size * + (row - start) * ncols); + memcpy(p, local_presult, out_cell_size * ncols); + +#pragma omp atomic update + computed++; + } + + } /* end parallel region */ + + for (row = start; row < end; row++) { + void *p = + G_incr_void_ptr(outbuf, out_cell_size * (row - start) * ncols); + Rast_put_row(outfd, p, out_type); } - Rast_put_row(outfd, presult, out_type); - } - G_percent(row, nrows, 2); + written = end; + + } /* end while loop */ + G_percent(nrows, nrows, 2); + + for (t = 0; t < nprocs; t++) { + G_free(patch[t]); + G_free(presult[t]); + } G_free(patch); G_free(presult); - for (i = 0; i < nfiles; i++) - Rast_close(infd[i]); + + for (t = 0; t < nprocs; t++) + for (i = 0; i < nfiles; i++) + Rast_close(infd[t][i]); if (!no_support) { /* diff --git a/raster/r.patch/r.patch.html b/raster/r.patch/r.patch.html index 307c2c799ea..3997c066db0 100644 --- a/raster/r.patch/r.patch.html +++ b/raster/r.patch/r.patch.html @@ -161,6 +161,23 @@

NOTES

r.series can be used instead of r.patch. +

PERFORMANCE

+

By specifying the number of parallel processes with nprocs option, +r.patch can run significantly faster, see benchmarks below. + +

+ benchmark for number of cells + benchmark for memory size +
+ Figure: Benchmark on the left shows execution time for different + number of cells, benchmark on the right shows execution time + for different memory size for 5000x5000 raster. See benchmark scripts in source code. + (Intel Core i9-10940X CPU @ 3.30GHz x 28) +
+

To reduce the memory requirements to minimum, set option memory to zero. +To take advantage of the parallelization, GRASS GIS +needs to compiled with OpenMP enabled. +

EXAMPLES

Example with three maps

@@ -209,4 +226,6 @@

AUTHORS

Michael Shapiro, U.S. Army Construction Engineering Research Laboratory
--z flag and performance improvement by Huidae Cho +Huidae Cho (-z flag and performance improvement) +
+Aaron Saw Min Sern (OpenMP support). diff --git a/raster/r.patch/r_patch_benchmark_memory.png b/raster/r.patch/r_patch_benchmark_memory.png new file mode 100644 index 00000000000..8681fe515db Binary files /dev/null and b/raster/r.patch/r_patch_benchmark_memory.png differ diff --git a/raster/r.patch/r_patch_benchmark_size.png b/raster/r.patch/r_patch_benchmark_size.png new file mode 100644 index 00000000000..01a8d3b52a6 Binary files /dev/null and b/raster/r.patch/r_patch_benchmark_size.png differ diff --git a/raster/r.patch/testsuite/test_rpatch_artificial.py b/raster/r.patch/testsuite/test_rpatch_artificial.py index f01beb0e253..a8b14dcb2e4 100644 --- a/raster/r.patch/testsuite/test_rpatch_artificial.py +++ b/raster/r.patch/testsuite/test_rpatch_artificial.py @@ -128,6 +128,7 @@ class TestSmallDataNoOverlap(TestCase): cell_1 = "rpatch_small_test_cell_1" cell_2 = "rpatch_small_test_cell_2" cell_patched = "rpatch_small_test_cell_patched" + cell_patched_threaded = "rpatch_small_test_cell_patched_threaded" cell_patched_ref = "rpatch_small_test_cell_patched_ref" @classmethod @@ -158,7 +159,14 @@ def test_patching_cell(self): self.assertModule( "r.patch", input=(self.cell_1, self.cell_2), output=self.cell_patched ) + self.assertModule( + "r.patch", + input=(self.cell_1, self.cell_2), + nprocs=8, + output=self.cell_patched_threaded, + ) self.to_remove.append(self.cell_patched) + self.to_remove.append(self.cell_patched_threaded) self.runModule( "r.in.ascii", input="-", @@ -169,6 +177,9 @@ def test_patching_cell(self): self.assertRastersNoDifference( self.cell_patched, self.cell_patched_ref, precision=0 ) + self.assertRastersNoDifference( + self.cell_patched_threaded, self.cell_patched_ref, precision=0 + ) class TestSmallDataOverlap(TestCase): @@ -180,7 +191,9 @@ class TestSmallDataOverlap(TestCase): cell_ab = "rpatch_small_test_cell_ab_reference" cell_ba = "rpatch_small_test_cell_ba_reference" cell_ab_result = "rpatch_small_test_cell_ab_result" + cell_ab_result_threaded = "rpatch_small_test_cell_ab_result_threaded" cell_ba_result = "rpatch_small_test_cell_ba_result" + cell_ba_result_threaded = "rpatch_small_test_cell_ba_result_threaded" @classmethod def setUpClass(cls): @@ -221,9 +234,21 @@ def test_patch_oder_ab_cell(self): output=self.cell_ab_result, flags="z", ) + self.assertModule( + "r.patch", + input=(self.cell_a, self.cell_b), + output=self.cell_ab_result_threaded, + flags="z", + nprocs=8, + ) self.assertRasterExists(self.cell_ab_result) + self.assertRasterExists(self.cell_ab_result_threaded) self.to_remove.append(self.cell_ab_result) + self.to_remove.append(self.cell_ab_result_threaded) self.assertRastersNoDifference(self.cell_ab_result, self.cell_ab, precision=0) + self.assertRastersNoDifference( + self.cell_ab_result_threaded, self.cell_ab, precision=0 + ) def test_patch_oder_ba_cell(self): """Test patching two overlapping CELL raster maps (watching order)""" @@ -233,9 +258,21 @@ def test_patch_oder_ba_cell(self): output=self.cell_ba_result, flags="z", ) + self.assertModule( + "r.patch", + input=(self.cell_b, self.cell_a), + flags="z", + output=self.cell_ba_result_threaded, + nprocs=8, + ) self.assertRasterExists(self.cell_ba_result) + self.assertRasterExists(self.cell_ba_result_threaded) self.to_remove.append(self.cell_ba_result) + self.to_remove.append(self.cell_ba_result_threaded) self.assertRastersNoDifference(self.cell_ba_result, self.cell_ba, precision=0) + self.assertRastersNoDifference( + self.cell_ba_result_threaded, self.cell_ba, precision=0 + ) if __name__ == "__main__":