From 39af79bfdfde17aa1fda8004f44063771b9d4cdf Mon Sep 17 00:00:00 2001 From: Kriti Birda Date: Fri, 7 Jun 2024 01:51:15 +0530 Subject: [PATCH 1/3] Add JSON support to r.univar Use parson to add json output format support to the r.univar module. --- raster/r.univar/Makefile | 4 +- raster/r.univar/globals.h | 6 +- raster/r.univar/r.univar.html | 29 +++++ raster/r.univar/r.univar_main.c | 14 ++- raster/r.univar/r3.univar_main.c | 14 ++- raster/r.univar/stats.c | 130 +++++++++++++++++---- raster/r.univar/testsuite/test_r_univar.py | 55 ++++++++- 7 files changed, 217 insertions(+), 35 deletions(-) diff --git a/raster/r.univar/Makefile b/raster/r.univar/Makefile index f14d55967db..1513d3496a8 100644 --- a/raster/r.univar/Makefile +++ b/raster/r.univar/Makefile @@ -1,8 +1,8 @@ MODULE_TOPDIR = ../.. -LIBES2 = $(RASTERLIB) $(GISLIB) $(MATHLIB) $(OPENMP_LIBPATH) $(OPENMP_LIB) -LIBES3 = $(RASTER3DLIB) $(RASTERLIB) $(GISLIB) $(MATHLIB) $(OPENMP_LIBPATH) $(OPENMP_LIB) +LIBES2 = $(RASTERLIB) $(GISLIB) $(MATHLIB) $(OPENMP_LIBPATH) $(OPENMP_LIB) $(PARSONLIB) +LIBES3 = $(RASTER3DLIB) $(RASTERLIB) $(GISLIB) $(MATHLIB) $(OPENMP_LIBPATH) $(OPENMP_LIB) $(PARSONLIB) DEPENDENCIES = $(RASTER3DDEP) $(GISDEP) $(RASTERDEP) EXTRA_CFLAGS = $(OPENMP_CFLAGS) EXTRA_INC = $(OPENMP_INCPATH) diff --git a/raster/r.univar/globals.h b/raster/r.univar/globals.h index 38599361109..a2bbc184f4a 100644 --- a/raster/r.univar/globals.h +++ b/raster/r.univar/globals.h @@ -52,18 +52,20 @@ typedef struct { /* command line options are the same for raster and raster3d maps */ typedef struct { struct Option *inputfile, *zonefile, *percentile, *output_file, *separator, - *nprocs; + *nprocs, *format; struct Flag *shell_style, *extended, *table, *use_rast_region; } param_type; extern param_type param; extern zone_type zone_info; +enum OutputFormat { PLAIN, JSON }; + /* fn prototypes */ void heapsort_double(double *data, size_t n); void heapsort_float(float *data, size_t n); void heapsort_int(int *data, size_t n); -int print_stats(univar_stat *stats); +int print_stats(univar_stat *stats, enum OutputFormat format); int print_stats_table(univar_stat *stats); univar_stat *create_univar_stat_struct(int map_type, int n_perc); void free_univar_stat_struct(univar_stat *stats); diff --git a/raster/r.univar/r.univar.html b/raster/r.univar/r.univar.html index eaedaa0aae8..90e0dc902ad 100644 --- a/raster/r.univar/r.univar.html +++ b/raster/r.univar/r.univar.html @@ -238,6 +238,35 @@

Zonal statistics

dataset) viewed through Libre/Open Office Calc. +

JSON Output

+
+r.univar -e elevation percentile=98 format=json
+
+will output the results in JSON format: + +
+[
+    {
+        "n": 2025000,
+        "null_cells": 0,
+        "cells": 2025000,
+        "min": 55.578792572021484,
+        "max": 156.32986450195312,
+        "range": 100.75107192993164,
+        "mean": 110.37544027560575,
+        "mean_of_abs": 110.37544027560575,
+        "stddev": 20.315323320598083,
+        "variance": 412.7123616204363,
+        "coeff_var": 18.40565552433679,
+        "sum": 223510266.55810165,
+        "first_quartile": 94.789985656738281,
+        "median": 108.87990570068359,
+        "third_quartile": 126.79196929931641,
+        "percentile_98": 147.7265625
+    }
+]
+
+

TODO

To be implemented mode, skewness, kurtosis. diff --git a/raster/r.univar/r.univar_main.c b/raster/r.univar/r.univar_main.c index 3c070ca827d..84550a87347 100644 --- a/raster/r.univar/r.univar_main.c +++ b/raster/r.univar/r.univar_main.c @@ -110,6 +110,9 @@ void set_params(void) _("Table output format instead of standard output format"); param.table->guisection = _("Formatting"); + param.format = G_define_standard_option(G_OPT_F_FORMAT); + param.format->guisection = _("Print"); + param.use_rast_region = G_define_flag(); param.use_rast_region->key = 'r'; param.use_rast_region->description = @@ -140,6 +143,8 @@ int main(int argc, char *argv[]) const char *mapset, *name; int t; + enum OutputFormat format; + G_gisinit(argv[0]); module = G_define_module(); @@ -175,6 +180,13 @@ int main(int argc, char *argv[]) } } + if (strcmp(param.format->answer, "json") == 0) { + format = JSON; + } + else { + format = PLAIN; + } + /* set nprocs parameter */ int nprocs; sscanf(param.nprocs->answer, "%d", &nprocs); @@ -283,7 +295,7 @@ int main(int argc, char *argv[]) if (param.table->answer) print_stats_table(stats); else - print_stats(stats); + print_stats(stats, format); /* release memory */ free_univar_stat_struct(stats); diff --git a/raster/r.univar/r3.univar_main.c b/raster/r.univar/r3.univar_main.c index 5ad71240bbd..28fdb69acd5 100644 --- a/raster/r.univar/r3.univar_main.c +++ b/raster/r.univar/r3.univar_main.c @@ -66,6 +66,9 @@ void set_params(void) param.table->description = _("Table output format instead of standard output format"); + param.format = G_define_standard_option(G_OPT_F_FORMAT); + param.format->guisection = _("Print"); + return; } @@ -91,6 +94,8 @@ int main(int argc, char *argv[]) struct GModule *module; + enum OutputFormat format; + G_gisinit(argv[0]); module = G_define_module(); @@ -129,6 +134,13 @@ int main(int argc, char *argv[]) } } + if (strcmp(param.format->answer, "json") == 0) { + format = JSON; + } + else { + format = PLAIN; + } + /* table field separator */ zone_info.sep = G_option_to_separator(param.separator); @@ -318,7 +330,7 @@ int main(int argc, char *argv[]) if (param.table->answer) print_stats_table(stats); else - print_stats(stats); + print_stats(stats, format); /* release memory */ free_univar_stat_struct(stats); diff --git a/raster/r.univar/stats.c b/raster/r.univar/stats.c index efdcbe22f57..ecb4c294a2a 100644 --- a/raster/r.univar/stats.c +++ b/raster/r.univar/stats.c @@ -11,6 +11,7 @@ * */ +#include #include "globals.h" /* *************************************************************** */ @@ -81,8 +82,20 @@ void free_univar_stat_struct(univar_stat *stats) /* *************************************************************** */ /* **** compute and print univar statistics to stdout ************ */ /* *************************************************************** */ -int print_stats(univar_stat *stats) +int print_stats(univar_stat *stats, enum OutputFormat format) { + JSON_Value *root_value, *zone_value; + JSON_Array *root_array; + JSON_Object *zone_object; + + if (format == JSON) { + root_value = json_value_init_array(); + if (root_value == NULL) { + G_fatal_error(_("Failed to initialize JSON array. Out of memory?")); + } + root_array = json_array(root_value); + } + int z, n_zones = zone_info.n_zones; if (n_zones == 0) @@ -117,7 +130,7 @@ int print_stats(univar_stat *stats) sprintf(sum_str, "%.15g", stats[z].sum); G_trim_decimal(sum_str); - if (!param.shell_style->answer) { + if (!param.shell_style->answer && format == PLAIN) { if (zone_info.n_zones) { int z_cat = z + zone_info.min; @@ -131,26 +144,61 @@ int print_stats(univar_stat *stats) fprintf(stdout, "Of the non-null cells:\n----------------------\n"); } - if (param.shell_style->answer) { + if (param.shell_style->answer || format == JSON) { + if (format == JSON) { + zone_value = json_value_init_object(); + zone_object = json_object(zone_value); + } if (zone_info.n_zones) { int z_cat = z + zone_info.min; - fprintf(stdout, "zone=%d;%s\n", z_cat, + switch (format) { + case PLAIN: + fprintf(stdout, "zone=%d;%s\n", z_cat, + Rast_get_c_cat(&z_cat, &(zone_info.cats))); + break; + case JSON: + json_object_set_number(zone_object, "zone_number", z_cat); + json_object_set_string( + zone_object, "zone_category", Rast_get_c_cat(&z_cat, &(zone_info.cats))); + break; + } + } + switch (format) { + case PLAIN: + fprintf(stdout, "n=%lu\n", stats[z].n); + fprintf(stdout, "null_cells=%lu\n", stats[z].size - stats[z].n); + fprintf(stdout, "cells=%lu\n", stats[z].size); + fprintf(stdout, "min=%.15g\n", stats[z].min); + fprintf(stdout, "max=%.15g\n", stats[z].max); + fprintf(stdout, "range=%.15g\n", stats[z].max - stats[z].min); + fprintf(stdout, "mean=%.15g\n", mean); + fprintf(stdout, "mean_of_abs=%.15g\n", + stats[z].sum_abs / stats[z].n); + fprintf(stdout, "stddev=%.15g\n", stdev); + fprintf(stdout, "variance=%.15g\n", variance); + fprintf(stdout, "coeff_var=%.15g\n", var_coef); + fprintf(stdout, "sum=%s\n", sum_str); + break; + case JSON: + json_object_set_number(zone_object, "n", stats[z].n); + json_object_set_number(zone_object, "null_cells", + stats[z].size - stats[z].n); + json_object_set_number(zone_object, "cells", stats[z].size); + json_object_set_number(zone_object, "min", stats[z].min); + json_object_set_number(zone_object, "max", stats[z].max); + json_object_set_number(zone_object, "range", + stats[z].max - stats[z].min); + json_object_set_number(zone_object, "mean", mean); + json_object_set_number(zone_object, "mean_of_abs", + stats[z].sum_abs / stats[z].n); + json_object_set_number(zone_object, "stddev", stdev); + json_object_set_number(zone_object, "variance", variance); + json_object_set_number(zone_object, "coeff_var", var_coef); + json_object_set_number(zone_object, "sum", stats[z].sum); + break; } - fprintf(stdout, "n=%lu\n", stats[z].n); - fprintf(stdout, "null_cells=%lu\n", stats[z].size - stats[z].n); - fprintf(stdout, "cells=%lu\n", stats[z].size); - fprintf(stdout, "min=%.15g\n", stats[z].min); - fprintf(stdout, "max=%.15g\n", stats[z].max); - fprintf(stdout, "range=%.15g\n", stats[z].max - stats[z].min); - fprintf(stdout, "mean=%.15g\n", mean); - fprintf(stdout, "mean_of_abs=%.15g\n", - stats[z].sum_abs / stats[z].n); - fprintf(stdout, "stddev=%.15g\n", stdev); - fprintf(stdout, "variance=%.15g\n", variance); - fprintf(stdout, "coeff_var=%.15g\n", var_coef); - fprintf(stdout, "sum=%s\n", sum_str); } else { fprintf(stdout, "n: %lu\n", stats[z].n); @@ -244,17 +292,38 @@ int print_stats(univar_stat *stats) } } - if (param.shell_style->answer) { - fprintf(stdout, "first_quartile=%g\n", quartile_25); - fprintf(stdout, "median=%g\n", median); - fprintf(stdout, "third_quartile=%g\n", quartile_75); + if (param.shell_style->answer || format == JSON) { + switch (format) { + case PLAIN: + fprintf(stdout, "first_quartile=%g\n", quartile_25); + fprintf(stdout, "median=%g\n", median); + fprintf(stdout, "third_quartile=%g\n", quartile_75); + break; + case JSON: + json_object_set_number(zone_object, "first_quartile", + quartile_25); + json_object_set_number(zone_object, "median", median); + json_object_set_number(zone_object, "third_quartile", + quartile_75); + break; + } + for (i = 0; i < stats[z].n_perc; i++) { - char buf[24]; + char buf[24], buf2[36]; sprintf(buf, "%.15g", stats[z].perc[i]); G_strchg(buf, '.', '_'); - fprintf(stdout, "percentile_%s=%g\n", buf, - quartile_perc[i]); + switch (format) { + case PLAIN: + fprintf(stdout, "percentile_%s=%g\n", buf, + quartile_perc[i]); + break; + case JSON: + snprintf(buf2, 36, "percentile_%s", buf); + json_object_set_number(zone_object, buf2, + quartile_perc[i]); + break; + } } } else { @@ -301,6 +370,19 @@ int print_stats(univar_stat *stats) * above with zone */ /* if (!(param.shell_style->answer)) G_message("\n"); */ + if (format == JSON) { + json_array_append_value(root_array, zone_value); + } + } + + if (format == JSON) { + char *serialized_string = json_serialize_to_string_pretty(root_value); + if (serialized_string == NULL) { + G_fatal_error(_("Failed to initialize pretty JSON string.")); + } + puts(serialized_string); + json_free_serialized_string(serialized_string); + json_value_free(root_value); } return 1; diff --git a/raster/r.univar/testsuite/test_r_univar.py b/raster/r.univar/testsuite/test_r_univar.py index c03aea5dcfa..249df71f4d1 100644 --- a/raster/r.univar/testsuite/test_r_univar.py +++ b/raster/r.univar/testsuite/test_r_univar.py @@ -3,8 +3,12 @@ @author Soeren Gebbert """ +import json + from grass.gunittest.case import TestCase +from grass.gunittest.gmodules import SimpleModule + class TestRasterUnivar(TestCase): @classmethod @@ -562,11 +566,52 @@ def test_zone_with_gap_in_cats(self): sep="=", ) - -class TestAccumulateFails(TestCase): - def test_error_handling(self): - # No vector map, no strds, no coordinates - self.assertModuleFail("r.univar", flags="r", map="map_a", zones="map_b") + def test_json(self): + reference = [ + { + "zone_number": 1, + "zone_category": "", + "n": 3420, + "null_cells": 0, + "cells": 3420, + "min": 102, + "max": 309, + "range": 207, + "mean": 205.5, + "mean_of_abs": 205.5, + "stddev": 56.611983419296187, + "variance": 3204.9166666666665, + "coeff_var": 27.548410423015174, + "sum": 702810, + }, + { + "zone_number": 2, + "zone_category": "", + "n": 12780, + "null_cells": 0, + "cells": 12780, + "min": 121, + "max": 380, + "range": 259, + "mean": 250.5, + "mean_of_abs": 250.5, + "stddev": 59.957623924457401, + "variance": 3594.9166666666665, + "coeff_var": 23.935179211360243, + "sum": 3201390, + }, + ] + + module = SimpleModule( + "r.univar", + map=["map_a", "map_b"], + zones="zone_map", + flags="g", + format="json", + ) + self.runModule(module) + expected = json.loads(module.outputs.stdout) + self.assertListEqual(reference, expected) if __name__ == "__main__": From a753ab29d5cc9a87faa3685a337a657e4d4ba3c6 Mon Sep 17 00:00:00 2001 From: Kriti Birda Date: Tue, 2 Jul 2024 18:21:14 +0530 Subject: [PATCH 2/3] address pr feedback --- raster/r.univar/r.univar.html | 11 ++++++++++- raster/r.univar/stats.c | 23 ++++++++++++++++++++-- raster/r.univar/testsuite/test_r_univar.py | 21 +++++++++++++++++--- 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/raster/r.univar/r.univar.html b/raster/r.univar/r.univar.html index 90e0dc902ad..5220bc64a44 100644 --- a/raster/r.univar/r.univar.html +++ b/raster/r.univar/r.univar.html @@ -262,7 +262,16 @@

JSON Output

"first_quartile": 94.789985656738281, "median": 108.87990570068359, "third_quartile": 126.79196929931641, - "percentile_98": 147.7265625 + "percentiles": [ + { + "percentile": 98, + "value": 147.7265625 + }, + { + "percentile": 9, + "value": 83.494270324707031 + } + ] } ] diff --git a/raster/r.univar/stats.c b/raster/r.univar/stats.c index ecb4c294a2a..649ec68d8d3 100644 --- a/raster/r.univar/stats.c +++ b/raster/r.univar/stats.c @@ -308,6 +308,15 @@ int print_stats(univar_stat *stats, enum OutputFormat format) break; } + JSON_Value *percentiles_array_value, *percentile_value; + JSON_Array *percentiles_array; + JSON_Object *percentile_object; + + if (format == JSON) { + percentiles_array_value = json_value_init_array(); + percentiles_array = json_array(percentiles_array_value); + } + for (i = 0; i < stats[z].n_perc; i++) { char buf[24], buf2[36]; @@ -319,12 +328,22 @@ int print_stats(univar_stat *stats, enum OutputFormat format) quartile_perc[i]); break; case JSON: - snprintf(buf2, 36, "percentile_%s", buf); - json_object_set_number(zone_object, buf2, + percentile_value = json_value_init_object(); + percentile_object = json_object(percentile_value); + json_object_set_number(percentile_object, "percentile", + stats[z].perc[i]); + json_object_set_number(percentile_object, "value", quartile_perc[i]); + json_array_append_value(percentiles_array, + percentile_value); break; } } + + if (format == JSON) { + json_object_set_value(zone_object, "percentiles", + percentiles_array_value); + } } else { fprintf(stdout, "1st quartile: %g\n", quartile_25); diff --git a/raster/r.univar/testsuite/test_r_univar.py b/raster/r.univar/testsuite/test_r_univar.py index 249df71f4d1..de28f0753d5 100644 --- a/raster/r.univar/testsuite/test_r_univar.py +++ b/raster/r.univar/testsuite/test_r_univar.py @@ -4,6 +4,7 @@ """ import json +from itertools import zip_longest from grass.gunittest.case import TestCase @@ -583,6 +584,10 @@ def test_json(self): "variance": 3204.9166666666665, "coeff_var": 27.548410423015174, "sum": 702810, + "first_quartile": 155, + "median": 205.5, + "percentiles": [{"percentile": 90, "value": 282}], + "third_quartile": 255, }, { "zone_number": 2, @@ -599,6 +604,10 @@ def test_json(self): "variance": 3594.9166666666665, "coeff_var": 23.935179211360243, "sum": 3201390, + "first_quartile": 200, + "median": 250.5, + "percentiles": [{"percentile": 90, "value": 330}], + "third_quartile": 300, }, ] @@ -606,12 +615,18 @@ def test_json(self): "r.univar", map=["map_a", "map_b"], zones="zone_map", - flags="g", + flags="ge", format="json", ) self.runModule(module) - expected = json.loads(module.outputs.stdout) - self.assertListEqual(reference, expected) + output = json.loads(module.outputs.stdout) + for expected, received in zip_longest(reference, output): + self.assertCountEqual(list(expected.keys()), list(received.keys())) + for key in expected: + if isinstance(expected[key], float): + self.assertAlmostEqual(expected[key], received[key], places=6) + else: + self.assertEqual(expected[key], received[key]) if __name__ == "__main__": From 93fbc955d1685102a2a19bda07606272942d5d6e Mon Sep 17 00:00:00 2001 From: Kriti Birda <164247895+kritibirda26@users.noreply.github.com> Date: Tue, 2 Jul 2024 19:17:41 +0530 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: Nicklas Larsson --- raster/r.univar/stats.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/raster/r.univar/stats.c b/raster/r.univar/stats.c index 649ec68d8d3..97367976103 100644 --- a/raster/r.univar/stats.c +++ b/raster/r.univar/stats.c @@ -318,9 +318,9 @@ int print_stats(univar_stat *stats, enum OutputFormat format) } for (i = 0; i < stats[z].n_perc; i++) { - char buf[24], buf2[36]; + char buf[24]; - sprintf(buf, "%.15g", stats[z].perc[i]); + snprintf(buf, sizeof(buf), "%.15g", stats[z].perc[i]); G_strchg(buf, '.', '_'); switch (format) { case PLAIN: