From 729a57c6f223f0bf854f5437788d095ca0bec876 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 22 Feb 2024 15:46:29 -0700 Subject: [PATCH] Update develop-ref after #2823 (#2824) * Feature #2395 wdir (#2820) * Per #2395, add new columns to VL1L2, VAL1L2, and VCNT line types for wind direction statistics. Work still in progress. * Per #2395, write the new VCNT columns to the output and document the additions to the VL1L2, VAL1L2, and VCNT columns. * Per #2395, add the definition of new statistics to Appendix G. * Per #2395, update file version history. * Per #2395, tweak warning message about zero wind vectors and update grid-stat and point-stat to log calls to the do_vl1l2() function. * Per #2395, refine the weights for wind direction stats, ignoring the undefined directions. * Update src/tools/core/stat_analysis/aggr_stat_line.cc * Update src/tools/core/stat_analysis/parse_stat_line.cc * Update src/tools/core/stat_analysis/aggr_stat_line.cc * Recent changes to branch protection rules for the develop branch have broken the logic of the update_truth.yml GHA workflow. Instead of submitting a PR to merge develop into develop-ref directly, use an intermediate update_truth_for_develop branch. * Feature #2280 ens_prob (#2823) * Per #2280, update to support probability threshold strings like ==8, where 8 is the number of ensemble members, to create probability bins centered on the n/8 for n = 0 ... 8. * Per #2280, update docs about probability threshold settings. * Per #2280, use a loose tolerance when checking for consistent bin widths. * Per #2280, add a new unit test for grid_stat to demonstrate processing the output from gen_ens_prod. * Per #2280, when verifying NMEP probability forecasts, smooth the obs data first. * Per #2280, only request STAT output for the PCT line type to match unit_grid_stat.xml and minimize the new output files. * Per #2280, update config option docs. * Per #2280, update config option docs. --------- Co-authored-by: John Halley Gotway Co-authored-by: metplus-bot <97135045+metplus-bot@users.noreply.github.com> --- .github/workflows/testing.yml | 4 +- docs/Users_Guide/config_options.rst | 70 +++-- .../config/GridStatConfig_gen_ens_prod | 285 ++++++++++++++++++ internal/test_unit/xml/unit_grid_stat.xml | 20 +- src/basic/vx_util/thresh_array.cc | 180 +++++++---- src/basic/vx_util/thresh_array.h | 5 +- 6 files changed, 484 insertions(+), 80 deletions(-) create mode 100644 internal/test_unit/config/GridStatConfig_gen_ens_prod diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index b667511b92..1fccc357fa 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -136,7 +136,7 @@ jobs: - jobid: 'job1' tests: 'ascii2nc' - jobid: 'job2' - tests: 'pb2nc madis2nc pcp_combine' + tests: 'pb2nc madis2nc pcp_combine gen_ens_prod' fail-fast: false steps: - uses: actions/checkout@v4 @@ -176,7 +176,7 @@ jobs: - jobid: 'job1' tests: 'ascii2nc_indy pb2nc_indy tc_dland tc_pairs tc_stat plot_tc tc_rmw rmw_analysis tc_diag tc_gen' - jobid: 'job2' - tests: 'met_test_scripts mode_multivar mode_graphics mtd regrid airnow gsi_tools netcdf modis series_analysis gen_ens_prod wwmca_regrid gen_vx_mask grid_weight interp_shape grid_diag grib_tables lidar2nc shift_data_plane trmm2nc aeronet wwmca_plot ioda2nc gaussian' + tests: 'met_test_scripts mode_multivar mode_graphics mtd regrid airnow gsi_tools netcdf modis series_analysis wwmca_regrid gen_vx_mask grid_weight interp_shape grid_diag grib_tables lidar2nc shift_data_plane trmm2nc aeronet wwmca_plot ioda2nc gaussian' fail-fast: false steps: - uses: actions/checkout@v4 diff --git a/docs/Users_Guide/config_options.rst b/docs/Users_Guide/config_options.rst index 60f81e5ef1..ec76ecf7d5 100644 --- a/docs/Users_Guide/config_options.rst +++ b/docs/Users_Guide/config_options.rst @@ -101,13 +101,13 @@ The configuration file language supports the following data types: the user has already determined to be 2.5 outside of MET. * "==FBIAS" for a user-specified frequency bias value. - e.g. "==FBIAS1" to automatically de-bias the data, "==FBIAS0.9" to select a low-bias threshold, or "==FBIAS1.1" to select a high-bias threshold. - This option must be used in - conjunction with a simple threshold in the other field. For example, - when "obs.cat_thresh = >5.0" and "fcst.cat_thresh = ==FBIAS1;", - MET applies the >5.0 threshold to the observations and then chooses a - forecast threshold which results in a frequency bias of 1. - The frequency bias can be any float value > 0.0. + e.g. "==FBIAS1" to automatically de-bias the data, "==FBIAS0.9" to + select a low-bias threshold, or "==FBIAS1.1" to select a high-bias + threshold. This option must be used in conjunction with a simple + threshold in the other field. For example, when "obs.cat_thresh = >5.0" + and "fcst.cat_thresh = ==FBIAS1;", MET applies the >5.0 threshold to + the observations and then chooses a forecast threshold which results in + a frequency bias of 1. The frequency bias can be any float value > 0.0. * "CDP" for climatological distribution percentile thresholds. These thresholds require that the climatological mean and standard @@ -842,32 +842,37 @@ to be verified. This dictionary may include the following entries: When set as a boolean to TRUE, it indicates that the "fcst.field" data should be treated as probabilities. For example, when verifying the - probabilistic NetCDF output of Ensemble-Stat, one could configure the - Grid-Stat or Point-Stat tools as follows: + probabilistic NetCDF output of Gen-Ens-Prod for an ensemble of size 10, + one could configure the Grid-Stat or Point-Stat tools as follows: .. code-block:: none fcst = { - field = [ { name = "APCP_24_A24_ENS_FREQ_gt0.0"; - level = "(*,*)"; - prob = TRUE; } ]; + field = [ { name = "APCP_24_A24_ENS_FREQ_gt0.0"; + level = "(*,*)"; + cat_thresh = ==10; + prob = TRUE; } ]; } Setting "prob = TRUE" indicates that the "APCP_24_A24_ENS_FREQ_gt0.0" - data should be processed as probabilities. + data should be processed as probabilities. Setting "cat_thresh = ==10" + indicates that these probabilities are derived from an ensemble with 10 + members and 11 probability bins should be defined, each centered on the + value n/10 for n = 0, 1, ... 10. When set as a dictionary, it defines the probabilistic field to be used. For example, when verifying GRIB files containing probabilistic - data, one could configure the Grid-Stat or Point-Stat tools as - follows: + data, one could configure the Grid-Stat or Point-Stat tools as follows: .. code-block:: none fcst = { - field = [ { name = "PROB"; level = "A24"; - prob = { name = "APCP"; thresh_lo = 2.54; } }, - { name = "PROB"; level = "P850"; - prob = { name = "TMP"; thresh_hi = 273; } } ]; + field = [ { name = "PROB"; level = "A24"; + prob = { name = "APCP"; thresh_lo = 2.54; } + cat_thresh = ==0.25; }, + { name = "PROB"; level = "P850"; + prob = { name = "TMP"; thresh_hi = 273; } + cat_thresh = ==0.1; } ]; } The example above selects two probabilistic fields. In both, "name" @@ -883,6 +888,31 @@ to be verified. This dictionary may include the following entries: with a range [0, 100], it will automatically rescale it to be [0, 1] before applying the probabilistic verification methods. + Probabilistic statistics in MET are derived from an Nx2 probabilistic + contingency table. The N-dimension is determined by the number of + probability bins requested. The "cat_thresh" configuration option + defines the number of and size of these probabibility bins. The bins + must include the full range of possible probability values, [0, 1]. + Since selecting bins of equal width is common, shorthand notation is + provided to do so. The following options are supported. + + * :code:`cat_thresh = [ ==0.25 ];` specifies an equal probability bin + width of 0.25 and defines 4 bins between the values 0, 0.25, 0.5, 0.75, + and 1.0. The :code:`==p` threshold may be set to any probability bin + width greater than 0 and less than 1. + + * :code:`cat_thresh = [ ==10 ];` specifies probability bins for an + ensemble of size 10 and defines 11 bins between the values -0.05, 0.05, + 0.15, ..., 0.95, and 1.05. Note that each bin is centered on the + probability value n/10, for n = 0 to 10. The :code:`==n` threshold may + be set to any integer number of ensemble members greater than 1 to + define n+1 probability bins. + + * :code:`cat_thresh = [ >=0, >=0.5, >=0.75, >=1.0 ];` explicitly + specifies the probability thresholds and defines 3 bins of unequal + width between the values 0, 0.5, 0.75, and 1.0. By convention, the + greater-than-or-equal-to (">=" or "ge") inequality type is required. + * Set "prob_as_scalar = TRUE" to override the processing of probability data. When the "prob" entry is set as a dictionary to define the field of interest, setting "prob_as_scalar = TRUE" indicates that this @@ -2047,7 +2077,7 @@ This dictionary may include the following entries: .. code-block:: none hira = { - flag = FALSE; + flag = FALSE; width = [ 2, 3, 4, 5 ]; vld_thresh = 1.0; cov_thresh = [ ==0.25 ]; diff --git a/internal/test_unit/config/GridStatConfig_gen_ens_prod b/internal/test_unit/config/GridStatConfig_gen_ens_prod new file mode 100644 index 0000000000..8893c2d5ff --- /dev/null +++ b/internal/test_unit/config/GridStatConfig_gen_ens_prod @@ -0,0 +1,285 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Grid-Stat configuration file. +// +// For additional information, please see the MET User's Guide. +// +//////////////////////////////////////////////////////////////////////////////// + +// +// Output model name to be written +// +model = "FCST"; + +// +// Output description to be written +// May be set separately in each "obs.field" entry +// +desc = "NA"; + +// +// Output observation type to be written +// +obtype = "ANALYS"; + +//////////////////////////////////////////////////////////////////////////////// + +// +// Verification grid +// May be set separately in each "field" entry +// +regrid = { + to_grid = NONE; + method = NEAREST; + width = 1; + vld_thresh = 0.5; + shape = SQUARE; +} + +//////////////////////////////////////////////////////////////////////////////// + +// +// May be set separately in each "field" entry +// +censor_thresh = []; +censor_val = []; +mpr_column = []; +mpr_thresh = []; +cat_thresh = []; +cnt_thresh = [ NA ]; +cnt_logic = UNION; +wind_thresh = [ NA ]; +wind_logic = UNION; +eclv_points = 0.05; +nc_pairs_var_name = ""; +nc_pairs_var_suffix = ""; +hss_ec_value = NA; +rank_corr_flag = FALSE; + +// +// Forecast and observation fields to be verified +// +fcst = { + + level = "(*,*)"; + cat_thresh = ==7; + prob = TRUE; + + field = [ + { name = "UGRD_Z10_ENS_FREQ_ltCDP25"; }, + { name = "UGRD_Z10_ENS_NEP_ltCDP25_NBRHD25"; }, + { name = "UGRD_Z10_ENS_NMEP_ltCDP25_NBRHD25_GAUSSIAN1"; } + ]; + +} +obs = { + + name = "UGRD"; + level = "Z10"; + cat_thresh = =0.5 ]; +} + +//////////////////////////////////////////////////////////////////////////////// + +// +// Fourier decomposition +// May be set separately in each "obs.field" entry +// +fourier = { + wave_1d_beg = []; + wave_1d_end = []; +} + +//////////////////////////////////////////////////////////////////////////////// + +// +// Gradient statistics +// May be set separately in each "obs.field" entry +// +gradient = { + dx = [ 1 ]; + dy = [ 1 ]; +} + +//////////////////////////////////////////////////////////////////////////////// + +// +// Distance Map statistics +// May be set separately in each "obs.field" entry +// +distance_map = { + baddeley_p = 2; + baddeley_max_dist = NA; + fom_alpha = 0.1; + zhu_weight = 0.5; + beta_value(n) = n * n / 2.0; +} + +//////////////////////////////////////////////////////////////////////////////// + +// +// Threshold for SEEPS p1 (Probability of being dry) +// +seeps_p1_thresh = >=0.1&&<=0.85; + +//////////////////////////////////////////////////////////////////////////////// + +// +// Statistical output types +// May be set separately in each "obs.field" entry +// +output_flag = { + fho = NONE; + ctc = NONE; + cts = NONE; + mctc = NONE; + mcts = NONE; + cnt = NONE; + sl1l2 = NONE; + sal1l2 = NONE; + vl1l2 = NONE; + val1l2 = NONE; + vcnt = NONE; + pct = STAT; + pstd = NONE; + pjc = NONE; + prc = NONE; + eclv = NONE; + nbrctc = NONE; + nbrcts = NONE; + nbrcnt = NONE; + grad = NONE; + dmap = NONE; + seeps = NONE; +} + +// +// NetCDF matched pairs output file +// May be set separately in each "obs.field" entry +// +nc_pairs_flag = FALSE; + +//////////////////////////////////////////////////////////////////////////////// + +ugrid_dataset = ""; +ugrid_max_distance_km = 30; +ugrid_coordinates_file = ""; + +//////////////////////////////////////////////////////////////////////////////// + +grid_weight_flag = NONE; +tmp_dir = "/tmp"; +output_prefix = "${OUTPUT_PREFIX}"; +version = "V12.0.0"; + +//////////////////////////////////////////////////////////////////////////////// diff --git a/internal/test_unit/xml/unit_grid_stat.xml b/internal/test_unit/xml/unit_grid_stat.xml index f70f53ad22..cc24ad21d3 100644 --- a/internal/test_unit/xml/unit_grid_stat.xml +++ b/internal/test_unit/xml/unit_grid_stat.xml @@ -11,7 +11,7 @@ ]> - + @@ -328,4 +328,22 @@ + + &MET_BIN;/grid_stat + + OUTPUT_PREFIX GEN_ENS_PROD + CLIMO_MEAN_FILE &DATA_DIR_CLIMO;/NCEP_1.0deg/cmean_1d.19790410 + CLIMO_STDEV_FILE &DATA_DIR_CLIMO;/NCEP_1.0deg/cstdv_1d.19790410 + + \ + &OUTPUT_DIR;/gen_ens_prod/gen_ens_prod_NO_CTRL_20120410_120000V.nc \ + &DATA_DIR_MODEL;/grib1/arw-tom-gep0/arw-tom-gep0_2012040912_F024.grib \ + &CONFIG_DIR;/GridStatConfig_gen_ens_prod \ + -outdir &OUTPUT_DIR;/grid_stat -v 1 + + + &OUTPUT_DIR;/grid_stat/grid_stat_GEN_ENS_PROD_240000L_20120410_120000V.stat + + + diff --git a/src/basic/vx_util/thresh_array.cc b/src/basic/vx_util/thresh_array.cc index 8d1fda8312..eae780063f 100644 --- a/src/basic/vx_util/thresh_array.cc +++ b/src/basic/vx_util/thresh_array.cc @@ -527,6 +527,33 @@ void ThreshArray::get_simple_nodes(vector &v) { } +//////////////////////////////////////////////////////////////////////// + +bool ThreshArray::equal_bin_width(double &width) const { + + // Check number of elements + if(Nelements < 2) { + width = bad_data_double; + return(false); + } + + // Initialize return values + width = t[1].get_value() - t[0].get_value(); + bool is_equal = true; + + // Check for consistent widths, ignoring the last bin + for(int i=0; i<(Nelements-2); i++) { + double cur_width = t[i+1].get_value() - t[i].get_value(); + if(!is_eq(width, cur_width, loose_tol)) { + width = bad_data_double; + is_equal = false; + break; + } + } // end for i + + return(is_equal); +} + //////////////////////////////////////////////////////////////////////// // // External utility for parsing probability thresholds. @@ -535,8 +562,6 @@ void ThreshArray::get_simple_nodes(vector &v) { ThreshArray string_to_prob_thresh(const char *s) { ThreshArray ta; - double v; - int i; // Parse the input string as a comma-separated list ta.add_css(s); @@ -545,30 +570,29 @@ ThreshArray string_to_prob_thresh(const char *s) { if(ta.n() == 1 && ta[0].get_type() == thresh_eq) { // Store the threshold value - v = ta[0].get_value(); + double v = ta[0].get_value(); // Threshold value must be between 0 and 1 - if(v <= 0 || v >=1) { + // or be an integer greater than 1 + if(v <= 0 || (v >=1 && !is_eq(nint(v), v))) { mlog << Error << "\nstring_to_prob_thresh() -> " - << "threshold value (" << v - << ") must be between 0 and 1.\n\n"; + << "the threshold string (" << s + << ") must specify a probability bin width between 0 and 1 " + << "or an integer number of ensemble members.\n\n"; exit(1); } - // Determine input precision - ConcatString cs; - const char *ptr = strchr(s, '.'); - int prec = ( ptr ? m_strlen(++ptr) : 0 ); - cs.set_precision(prec); - - // Construct list of probability thresholds using the input precision - ta.clear(); - for(i=0; i*v<1.0; i++) { - cs << cs_erase << ">=" << i*v; - ta.add(cs.c_str()); + // Define probability bins from [0, 1] with equal width + if(v > 0 && v < 1) { + const char *ptr = strchr(s, '.'); + double prec = (ptr ? m_strlen(++ptr) : 0); + ta = define_prob_bins(0.0, 1.0, v, prec); + } + // Define ensemble probability bins + else { + double inc = 1.0/nint(v); + ta = define_prob_bins(-inc/2.0, 1.0+inc/2.0, inc, bad_data_int); } - cs << cs_erase << ">=" << 1.0; - ta.add(cs.c_str()); } // Check probability thresholds @@ -577,6 +601,39 @@ ThreshArray string_to_prob_thresh(const char *s) { return(ta); } +//////////////////////////////////////////////////////////////////////// + +ThreshArray define_prob_bins(double beg, double end, double inc, int prec) { + ThreshArray ta; + double v; + + // Check inputs + if(beg > 0 || end < 1 || inc <= 0 || + (!is_bad_data(prec) && prec < 0)) { + mlog << Error << "\nget_prob_bins() -> " + << "the probability thresholds must begin (" + << beg << ") <= 0 and end (" + << end << ") >= 1 with an increment (" + << inc << ") between 0 and 1 and precision (" + << prec << ") >= 0.\n\n"; + exit(1); + } + + // Set the specified precision + ConcatString cs; + if(!is_bad_data(prec)) cs.set_precision(prec); + + // Construct a list of probability thresholds + v = beg; + while(v <= end) { + cs << cs_erase << ">=" << v; + ta.add(cs.c_str()); + v += inc; + } + + return(ta); +} + //////////////////////////////////////////////////////////////////////// // // Convert array of probability thresholds to a string. @@ -584,47 +641,51 @@ ThreshArray string_to_prob_thresh(const char *s) { //////////////////////////////////////////////////////////////////////// ConcatString prob_thresh_to_string(const ThreshArray &ta) { - ConcatString s; + ConcatString cs; ThreshArray prob_ta; - bool status = true; + double w; // Check for probability thresholds - if(!check_prob_thresh(ta, false)) status = false; + if(check_prob_thresh(ta, false)) { - // Use the second threshold to construct a probability threshold array - // and check for equality - if(status) { - s << "==" << ta[1].get_value(); - prob_ta = string_to_prob_thresh(s.c_str()); - status = (ta == prob_ta); + // Check for equal bin widths + if(ta.equal_bin_width(w)) { + if(is_eq(ta[0].get_value(), 0.0) && + is_eq(ta[(ta.n() - 1)].get_value(), 1.0)) { + cs << "==" << w; + } + else { + cs << "==" << nint(1/w); + } + prob_ta = string_to_prob_thresh(cs.c_str()); + if(!(ta == prob_ta)) cs.clear(); + } } - // If not an array of probabilities, return comma-separated string - if(!status) s = ta.get_str(); + // Return comma-separated list of thresholds + if(cs.length() == 0) cs = ta.get_str(); - return(s); + return(cs); } //////////////////////////////////////////////////////////////////////// bool check_prob_thresh(const ThreshArray &ta, bool error_out) { - int i, n; - n = ta.n(); + const char * method_name = "check_prob_thresh() -> "; - // Check for at least 3 thresholds beginning with 0 and ending with 1. - if(n < 3 || - !is_eq(ta[0].get_value(), 0.0) || - !is_eq(ta[n-1].get_value(), 1.0)) { + int n = ta.n(); + // Check for at least 3 thresholds that include the range [0, 1] + if(n < 3 || ta[0].get_value() > 0 || ta[n-1].get_value() < 1) { if(error_out) { - mlog << Error << "\ncheck_prob_thresh() -> " - << "When verifying a probability field, you must " - << "select at least 3 thresholds beginning with 0.0 " - << "and ending with 1.0 (current setting: " - << ta.get_str() << ").\n" - << "Consider using the ==p shorthand notation for bins " - << "of equal width.\n\n"; + mlog << Error << "\n" << method_name + << "when verifying a probability field, you must " + << "select at least 3 thresholds which include the range [0, 1] " + << "(current setting: " << ta.get_str() << ").\n" + << "Consider using the \"==n\" shorthand notation to specify " + << "probability bins of equal width, for n < 1, or the integer " + << "number of ensemble members, for n > 1.\n\n"; exit(1); } else { @@ -632,18 +693,20 @@ bool check_prob_thresh(const ThreshArray &ta, bool error_out) { } } - for(i=0; i " - << "When verifying a probability field, all " + mlog << Error << "\n" << method_name + << "when verifying a probability field, all " << "thresholds must be greater than or equal to, " << "using \"ge\" or \">=\" (current setting: " << ta.get_str() << ").\n" - << "Consider using the ==p shorthand notation for bins " - << "of equal width.\n\n"; + << "Consider using the \"==n\" shorthand notation to specify " + << "probability bins of equal width, for n < 1, or the integer " + << "number of ensemble members, for n > 1.\n\n"; exit(1); } else { @@ -651,15 +714,20 @@ bool check_prob_thresh(const ThreshArray &ta, bool error_out) { } } - // Check that all thresholds are in [0, 1]. - if(ta[i].get_value() < 0.0 || ta[i].get_value() > 1.0) { + // Break out of the last loop + if(i+1 == n) break; + + // Check that all probability bins overlap with [0, 1] + if((ta[i].get_value() < 0 && ta[i+1].get_value() < 0) || + (ta[i].get_value() > 1 && ta[i+1].get_value() > 1)) { if(error_out) { - mlog << Error << "\ncheck_prob_thresh() -> " - << "When verifying a probability field, all thresholds " - << "must be between 0 and 1 (current setting: " + mlog << Error << "\n" << method_name + << "when verifying a probability field, each probability bin " + << "must overlap the range [0, 1] (current setting: " << ta.get_str() << ").\n" - << "Consider using the ==p shorthand notation for bins " - << "of equal width.\n\n"; + << "Consider using the \"==n\" shorthand notation to specify " + << "probability bins of equal width, for n < 1, or the integer " + << "number of ensemble members, for n > 1.\n\n"; exit(1); } else { diff --git a/src/basic/vx_util/thresh_array.h b/src/basic/vx_util/thresh_array.h index f4fe0c4336..4ad5cf5146 100644 --- a/src/basic/vx_util/thresh_array.h +++ b/src/basic/vx_util/thresh_array.h @@ -87,6 +87,8 @@ class ThreshArray { bool check_dbl(double) const; bool check_dbl(double, double, double) const; + + bool equal_bin_width(double &) const; }; //////////////////////////////////////////////////////////////////////// @@ -99,7 +101,8 @@ inline SingleThresh * ThreshArray::buf() const { return ( t ); //////////////////////////////////////////////////////////////////////// extern ThreshArray string_to_prob_thresh (const char *); -extern ConcatString prob_thresh_to_string (const ThreshArray &); +extern ConcatString prob_thresh_to_string (const ThreshArray &); +extern ThreshArray define_prob_bins (double, double, double, int); extern bool check_prob_thresh (const ThreshArray &, bool error_out = true); extern ThreshArray process_perc_thresh_bins (const ThreshArray &); extern ThreshArray process_rps_cdp_thresh (const ThreshArray &);