From 4e80f606fb8839d643f7d047b0c361e331b676c5 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 30 Aug 2024 12:34:40 -0600 Subject: [PATCH] Update develop-ref after MET#2951 (#2957) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 2673 Moved dvariable declaration after include * #2673 Move down namespace below include * Feature #2395 wdir (#2820) * Per #2395, add new columns to VL1L2, VAL1L2, and VCNT line types for wind direction statistics. Work still in progress. * Per #2395, write the new VCNT columns to the output and document the additions to the VL1L2, VAL1L2, and VCNT columns. * Per #2395, add the definition of new statistics to Appendix G. * Per #2395, update file version history. * Per #2395, tweak warning message about zero wind vectors and update grid-stat and point-stat to log calls to the do_vl1l2() function. * Per #2395, refine the weights for wind direction stats, ignoring the undefined directions. * Update src/tools/core/stat_analysis/aggr_stat_line.cc * Update src/tools/core/stat_analysis/parse_stat_line.cc * Update src/tools/core/stat_analysis/aggr_stat_line.cc * Recent changes to branch protection rules for the develop branch have broken the logic of the update_truth.yml GHA workflow. Instead of submitting a PR to merge develop into develop-ref directly, use an intermediate update_truth_for_develop branch. * Feature #2280 ens_prob (#2823) * Per #2280, update to support probability threshold strings like ==8, where 8 is the number of ensemble members, to create probability bins centered on the n/8 for n = 0 ... 8. * Per #2280, update docs about probability threshold settings. * Per #2280, use a loose tolerance when checking for consistent bin widths. * Per #2280, add a new unit test for grid_stat to demonstrate processing the output from gen_ens_prod. * Per #2280, when verifying NMEP probability forecasts, smooth the obs data first. * Per #2280, only request STAT output for the PCT line type to match unit_grid_stat.xml and minimize the new output files. * Per #2280, update config option docs. * Per #2280, update config option docs. * #2673 Change 0 to nullptr * #2673 Change 0 to nullptr * #2673 Change 0 to nullptr * #2673 Change 0 to nullptr * #2673 Change 0 to nullptr * #2673 Removed the redundant parentheses with return * #2673 Removed the redundant parentheses with return * #2673 Removed the redundant parentheses with return * #2673 Removed the redundant parentheses with return * #2673 Removed the redundant parentheses with return * #2673 restored return statement * #2673 Added std namespace * #2673 Moved down 'using namespace' statement. Removed trailing spaces * #2673 Moved down 'using namespace' statement. * #2673 Moved down 'using namespace' statement. * #2673 Moved down 'using namespace' statement. * #2673 Moved down 'using namespace' statement. * #2673 Added std namespace * #2673 Added std namespace * #2673 Added std namespace * #2673 Changed literal 1 to boolean value, true * Feature #2673 enum_to_string (#2835) * Feature #2583 ecnt (#2825) * Unrelated to #2583, fix typo in code comments. * Per #2583, add hooks write 3 new ECNT columns for observation error data. * Per #2583, make error messages about mis-matched array lengths more informative. * Per #2583, switch to more concise variable naming conventions of ign_oerr_cnv, ign_oerr_cor, and dawid_seb. * Per #2583, fix typo to enable compilation * Per #2583, define the 5 new ECNT column names. * Per #2583, add 5 new columns to the ECNT table in the Ensemble-Stat chapter * Per #2583, update stat_columns.cc to write these 5 new ECNT columns * Per #2583, update ECNTInfo class to compute the 5 new ECNT statistics. * Per #2583, update stat-analysis to parse the 5 new ECNT columns. * Per #2583, update aggregate_stat logic for 5 new ECNT columns. * Per #2583, update PairDataEnsemble logic for 5 new ECNT columns * Per #2583, update vx_statistics library with obs_error handling logic for the 5 new ECNT columns * Per #2583, changes to make it compile * Per #2583, changes to make it compile * Per #2583, switch to a consistent ECNT column naming convention with OERR at the end. Using IGN_CONV_OERR and IGN_CORR_OERR. * Per #2583, define ObsErrorEntry::variance() with a call to the dist_var() utility function. * Per #2583, update PairDataEnsemble::compute_pair_vals() to compute the 5 new stats with the correct inputs. * Per #2583, add DEBUG(10) log messages about computing these new stats. * Per #2583, update Stat-Analysis to compute these 5 new stats from the ORANK line type. * Per #2583, whitespace and comments. * Per #2583, update the User's Guide. * Per #2583, remove the DS_ADD_OERR and DS_MULT_OERR ECNT columns and rename DS_OERR as DSS, since observation error is not actually involved in its computation. * Per #2583, minor update to Appendix C * Per #2583, rename ECNT line type statistic DSS to IDSS. * Per #2583, fix a couple of typos * Per #2583, more error checking. * Per #2583, remove the ECNT IDSS column since its just 2*pi*IGN, the existing ignorance score, and only provides meaningful information when combined with the other Dawid-Sebastiani statistics that have already been removed. * Per #2583, add Eric's documentation of these new stats to Appendix C. Along the way, update the DOI links in the references based on this APA style guide: https://apastyle.apa.org/style-grammar-guidelines/references/dois-urls#:~:text=Include%20a%20DOI%20for%20all,URL%2C%20include%20only%20the%20DOI. * Per #2583, fix new equations with embedded underscores for PDF by defining both html and pdf formatting options. * Per #2583, update the ign_conv_oerr equation to include a 2 *pi multiplier for consistency with the existing ignorance score. Also, fix the documented equations. * Per #2583, remove log file that was inadvertently added on this branch. * Per #2583, simplify ObsErrorEntry::variance() implementation. For the distribution type of NONE, return a variance of 0.0 rather than bad data, as discussed with @michelleharrold and @JeffBeck-NOAA on 3/8/2024. --------- Co-authored-by: MET Tools Test Account * Revert #2825 since more documentation and testing is needed (#2837) This reverts commit 108a8958b206d6712197823a083666ab039bf818. * Feature #2583 ecnt fix IGN_OERR_CORR (#2838) * Unrelated to #2583, fix typo in code comments. * Per #2583, add hooks write 3 new ECNT columns for observation error data. * Per #2583, make error messages about mis-matched array lengths more informative. * Per #2583, switch to more concise variable naming conventions of ign_oerr_cnv, ign_oerr_cor, and dawid_seb. * Per #2583, fix typo to enable compilation * Per #2583, define the 5 new ECNT column names. * Per #2583, add 5 new columns to the ECNT table in the Ensemble-Stat chapter * Per #2583, update stat_columns.cc to write these 5 new ECNT columns * Per #2583, update ECNTInfo class to compute the 5 new ECNT statistics. * Per #2583, update stat-analysis to parse the 5 new ECNT columns. * Per #2583, update aggregate_stat logic for 5 new ECNT columns. * Per #2583, update PairDataEnsemble logic for 5 new ECNT columns * Per #2583, update vx_statistics library with obs_error handling logic for the 5 new ECNT columns * Per #2583, changes to make it compile * Per #2583, changes to make it compile * Per #2583, switch to a consistent ECNT column naming convention with OERR at the end. Using IGN_CONV_OERR and IGN_CORR_OERR. * Per #2583, define ObsErrorEntry::variance() with a call to the dist_var() utility function. * Per #2583, update PairDataEnsemble::compute_pair_vals() to compute the 5 new stats with the correct inputs. * Per #2583, add DEBUG(10) log messages about computing these new stats. * Per #2583, update Stat-Analysis to compute these 5 new stats from the ORANK line type. * Per #2583, whitespace and comments. * Per #2583, update the User's Guide. * Per #2583, remove the DS_ADD_OERR and DS_MULT_OERR ECNT columns and rename DS_OERR as DSS, since observation error is not actually involved in its computation. * Per #2583, minor update to Appendix C * Per #2583, rename ECNT line type statistic DSS to IDSS. * Per #2583, fix a couple of typos * Per #2583, more error checking. * Per #2583, remove the ECNT IDSS column since its just 2*pi*IGN, the existing ignorance score, and only provides meaningful information when combined with the other Dawid-Sebastiani statistics that have already been removed. * Per #2583, add Eric's documentation of these new stats to Appendix C. Along the way, update the DOI links in the references based on this APA style guide: https://apastyle.apa.org/style-grammar-guidelines/references/dois-urls#:~:text=Include%20a%20DOI%20for%20all,URL%2C%20include%20only%20the%20DOI. * Per #2583, fix new equations with embedded underscores for PDF by defining both html and pdf formatting options. * Per #2583, update the ign_conv_oerr equation to include a 2 *pi multiplier for consistency with the existing ignorance score. Also, fix the documented equations. * Per #2583, remove log file that was inadvertently added on this branch. * Per #2583, simplify ObsErrorEntry::variance() implementation. For the distribution type of NONE, return a variance of 0.0 rather than bad data, as discussed with @michelleharrold and @JeffBeck-NOAA on 3/8/2024. * Per #2583, updates to ensemble-stat.rst recommended by @michelleharrold and @JeffBeck-NOAA. * Per #2583, implement changes to the IGN_CORR_OERR corrected as directed by @ericgilleland. --------- Co-authored-by: MET Tools Test Account * Update the pull request template to include a question about expected impacts to existing METplus Use Cases. * #2830 Changed enum Builtin to enum class * #2830 Converted enum to enum class at config_constants.h * Feature #2830 bootstrap enum (#2843) * Bugfix #2833 develop azimuth (#2840) * Per #2833, fix n-1 bug when defining the azimuth delta for range/azimuth grids. * Per #2833, when definng TcrmwData:range_max_km, divide by n_range - 1 since the range values start at 0. * Per #2833, remove max_range_km from the TC-RMW config file. Set the default rmw_scale to NA so that its not used by default. And update the documentation. Still actually need to make the logic of the code work as it should. * Per #2833, update tc_rmw to define the range as either a function of rmw or using explicit spacing in km. * Per #2833, update the TCRMW Config files to remove the max_range_km entry, and update the unit test for one call to use RMW ranges and the other to use ranges defined in kilometers. * Per #2833, just correct code comments. * Per #2833, divide by n - 1 when computing the range delta, rather than n. * Per #2833, correct the handling of the maximum range in the tc-rmw tool. For fixed delta km, need to define the max range when setting up the grid at the beginning. --------- Co-authored-by: MET Tools Test Account * #2830 Changed enum PadSize to enum class * #2830 Removed redundant parantheses * #2830 Removed commenyted out code * #2830 Use auto * #2830 Changed enum to enum class for DistType, InterpMthd, GridTemplates, and NormalizeType * #2830 Moved enum_class_as_integer from header file to cc files * #2830 Added enum_as_int.hpp * #2830 Added enum_as_int.hpp * Deleted enum_class_as_integer and renamed it to enum_class_as_int * Removed redundant paranthese * #2830 Changed enum to enumclass * #2830 Changed enum_class_as_integer to enum_class_as_int * Feature #2379 sonarqube gha (#2847) * Per #2379, testing initial GHA SonarQube setup. * Per #2379, switch to only analyzing the src directory. * Per #2379, move more config logic from sonar-project.properties into the workflow. #ci-skip-all * Per #2379, try removing + symbols * Per #2379, move projectKey into xml workflow and remove sonar-project.properties. * Per #2379, try following the instructions at https://github.com/sonarsource-cfamily-examples/linux-autotools-gh-actions-sq/blob/main/.github/workflows/build.yml ci-skip-all * Per #2379, see details of progress described in this issue comment: https://github.com/dtcenter/MET/issues/2379#issuecomment-2000242425 * Unrelated to #2379, just removing spurious space that gets flagged as a diff when re-running enum_to_string on seneca. * Per #2379, try running SonarQube through GitHub. * Per #2379, remove empty env section and also disable the testing workflow temporarily during sonarqube development. * Per #2379, fix docker image name. * Per #2379, delete unneeded script. * Per #2379, update GHA to scan Python code and push to the correct SonarQube projects. * Per #2379, update GHA SonarQube project names * Per #2379, update the build job name * Per #2379, update the comile step name * Per #2379, switch to consistent SONAR variable names. * Per #2379, fix type in sed expressions. * Per #2379, just rename the log artifact * Per #2379, use time_command wrapper instead of run_command. * Per #2379, fix bad env var name * Per #2379, switch from egrep to grep. * Per #2379, just try cat-ting the logfile * Per #2379, test whether cat-ting the log file actually works. * Per #2379, revert back * Per #2379, mention SonarQube in the PR template. Make workflow name more succinct. * Per #2379, add SONAR_REFERENCE_BRANCH setting to define the sonar.newCode.referenceBranch property. The goal is to define the comparison reference branch for each SonarQube scan. * Per #2379, have the sonarqube.yml job print the reference branch it's using * Per #2379, intentionally introduce a new code smell to see if SonarQube correctly flag it as appearing in new code. * Per #2379, trying adding the SonarQube quality gate check. * Per #2379, add logic for using the report-task.txt output files to check the quality gate status for both the python and cxx scans. * Per #2379 must use unique GHA id's * Per #2379, working on syntax for quality gate checks * Per #2379, try again. * Per #2379, try again * Per #2379, try again * Per #2379, try again * Per #2379, try again * Per #2379, try again * Per #2379, try yet again * Per #2379 * Per #2379, add more debug * Per #2379, remove -it option from docker run commands * Per #2379, again * Per #2379, now that the scan works as expected, remove the intentional SonarQube code smell as well as debug logging. * Hotfix related to #2379. The sonar.newCode.referenceBranch and sonar.branch.name cannot be set to the same string! Only add the newCode definition when they differ. * #2830 Changed enum STATJobType to enum class * #2830 Changed STATLineType to enum class * #2830 Changed Action to enum class * #2830 Changed ModeDataType to enum class * #2830 Changed StepCase to enum class * #2830 Changed enum to enum class * #2830 Changed GenesisPairCategory to enum class * #2830 Removed rediundabt parenrthese * #2830 Reduced same if checking * #2830 Cleanup * #2830 USe empty() instead of lebgth checking * #2830 Adjusted indentations * Feature #2379 develop sonarqube updates (#2850) * Per #2379, move rgb2ctable.py into the python utility scripts directory for better organization and to enable convenient SonarQube scanning. * Per #2379, remove point.py from the vx_python3_utils directory which cleary was inadvertenlty added during development 4 years ago. As far as I can tell it isn't being called by any other code and doesn't belong in the repository. Note that scripts/python/met/point.py has the same name but is entirely different. * Per #2379, update the GHA SonarQube scan to do a single one with Python and C++ combined. The nightly build script is still doing 2 separate scans for now. If this all works well, they could also be combined into a single one. * Per #2379, eliminate MET_CONFIG_OPTIONS from the SonarQube workflow since it doesn't need to be and probably shouldn't be configurable. * Per #2379, trying to copy report-task.txt out of the image * Per #2379, update build_met_sonarqube.sh to check the scan return status * Per #2379, fix bash assignment syntax * Per #2379, remove unused SCRIPT_DIR envvar * Per #2379, switch to a single SonarQube scan for MET's nightly build as well * Feature 2654 ascii2nc polar buoy support (#2846) * Added iabp data type, and modified file_handler to filter based on time range, which was added as a command line option * handle time using input year, hour, min, and doy * cleanup and switch to position day of year for time computations * Added an ascii2nc unit test for iabp data * Added utility scripts to pull iabp data from the web and find files in a time range * Modified iabp_handler to always output a placeholder 'location' observation with value 1 * added description of IABP data python utility scripts * Fixed syntax error * Fixed Another syntax error. * Slight reformat of documentation * Per #2654, update the Makefiles in scripts/python/utility to include all the python scripts that should be installed. * Per #2654, remove unused code from get_iabp_from_web.py that is getting flagged as a bug by SonarQube. * Per #2654, fix typo in docs --------- Co-authored-by: John Halley Gotway Co-authored-by: MET Tools Test Account * Feature #2786 rpss_from_prob (#2861) * Per #2786, small change to a an error message unrelated to this development. * Per #2786, add RPSInfo::set_climo_prob() function to derive the RPS line type from climatology probability bins. And update Ensemble-Stat to call it. * Per #2786, minor change to clarify error log message. * Per #2786, for is_prob = TRUE input, the RPS line type is the only output option. Still need to update docs! * Per #2786, add new call to Ensemble-Stat to test computing RPS from climo probabilities * Per #2786, use name rps_climo_bin_prob to be very explicit. * Per #2786, redefine logic of RPSInfo::set_climo_bin_prob() to match the CPC definition. Note that reliability, resolution, uncertainty, and RPSS based on the sample climatology are all set to bad data. Need to investigate whether they can be computed using these inputs. * Per #2786, remove the requirement that any fcst.prob_cat_thresh thresholds must be defined. If they are defined, pass them through to the FCST_THRESH output column. If not, write NA. Add check to make sure the event occurs in exactly 1 category. * Per #2786, don't enforce fcst.prob_cat_thresh == obs.prob_cat_thresh for probabilistic inputs. And add more is_prob checks so that only the RPS line type can be written when given probabilistic inputs. * updated documentation * Per #2786, call rescale_probability() function to convert from 0-100 probs to 0-1 probs. --------- Co-authored-by: j-opatz * Feature #2862 v12.0.0-beta4 (#2864) * Feature #2379 develop single_sq_project (#2865) * Hotfix to the documentation in the develop branch. Issue #2858 was closed as a duplicate of #2857. I had included it in the MET-12.0.0-beta4 release notes, but the work is not yet actually complete. * Feature 2842 ugrid config (#2852) * #2842 Removed UGrid related setting * #2842 Corrected vertical level for data_plane_array * #2842 Do not allow the time range * #2842 The UGridConfig file can be passed as ugrid_dataset * #2842 Changed -config option to -ugrid_config * #2842 Deleted UGrid configurations * 2842 Fix a compile error when UGrid is disabled * #2842 Cleanup * #2842 Added an unittest point_stat_ugrid_mpas_config * #2842 Added a PointStatConfig without UGrid dataset. * #2842 Corrected ty[po at the variable name * Switched from time_centered to time_instant. I think time_centered is the center of the forecast lead window and time_instant is the time the forecast is valid (end of forecast window). * #2842 Removed ugrid_max_distance_km and unused metadata names * #2842 Restored time variable time_instant for LFric * #2842 Adjust lon between -180 and 180 * #2842 Adjust lon between -180 and 180 * #2842 Adjust lon between -180 and 180 * #2842 Adjusted lon to between -180 to 180 * #2842 Changed variable names * Per #2842, switch from degrees east to west right when the longitudes are read. * #2842, switch from degrees east to west right when the longitudes are read * #2842 Cleanup debug messages --------- Co-authored-by: Howard Soh Co-authored-by: Daniel Adriaansen Co-authored-by: John Halley Gotway * Feature 2753 comp script config (#2868) * set dynamic library file extension to .dylib if running on MacOS and .so otherwise * Added disabling of jasper documentation for compiliation on Hera * Updated * remove extra export of compiler env vars * include full path to log file so it is easier to file the log file to examine when a command fails * send cmake output to a log file * remove redundant semi-colon * use full path to log file so it is easier to examine on failure * use run_cmd to catch if rm command fails * Modifications for compilation on hera, gaea, and orion * Updating * fixed variable name * clean up if/else statements * set TIFF_LIBRARY_RELEASE argument to use full path to dynamic library file to prevent failure installing proj library * set LDFLAGS so that LDFLAGS value set in the user's environment will also be used * Updated based on gaea, orion, and hera installs * Updated * change extension of dynamic library files only if architecture is arm64 because older Macs still use .so * added netcdf library to args to prevent error installing NetCDF-CXX when PROJ has been installed in the same run of the script -- PATH is set in the COMPILE_PROJ if block that causes this flag from being added automatically * clean up how rpath and -L are added to LDFLAGS so that each entry is separate -- prevents errors installing on Mac arm64 because multiple rpath values aren't read using :. Also use MET_PROJLIB * Updated * removed -ltiff from MET libs * only add path to rpath and -L arguments if they are not already included in LDFLAGS * changed from using LIB_TIFF (full path to tiff lib file) to use TIFF_LIB_DIR (dir containing tiff lib file). Added TIFF_INCLUDE_DIR to proj compilation and -DJAS_ENABLE_DOC to jasper compliation taken from @jprestop branch * update comments * ensure all MET_* and MET_*LIB variables are added to the rpath for consistency * remove unnecessary if block and only export LDFLAGS at the end of setting locally * Updated * Added section for adding /lib64 and rearranged placement of ADDTL_DIR * Commenting out the running of the Jasper lib tests * Updating and/or removing files * Updating and/or removing files * Latest udpates which include the addition of the tiff library for proj * Remove commented out line. Co-authored-by: John Halley Gotway * Make indentation consistent. Co-authored-by: John Halley Gotway * Make indentation consistent. Co-authored-by: John Halley Gotway * Make indentation consistent. Co-authored-by: John Halley Gotway * Per 2753, added -lm to configure_lib_args for NetCDF-CXX * Per #2753 updating acorn files * Per #2753, update wcoss2 files * Per #2753, updating acorn file to include MET_PYTHON_EXE * Per #2753, updated files for 12.0.0 for derecho * Per #2753, updated derecho file adding MET_PYTHON_EXE and made corrections * Updating config files * Updating orion files * Updates for gaea's files * Updating gaea modulefile * Removing modulefile for cheyenne * Added MET_PYTHON_EXE * Added MET_PYTHON_EXE to hera too * Adding file for hercules * Removing equals sign from setenv * Adding file for hercules * Updated script to add libjpeg installation for grib2c * Per #2753, Adding file for casper --------- Co-authored-by: George McCabe <23407799+georgemccabe@users.noreply.github.com> Co-authored-by: John Halley Gotway * Feature #2795 level_mismatch_warning (#2873) * Per #2795, move the warning message about level mismatch from the config validation step to when the forecast files are being processed. Only check this when the number of forecast fields is greater than 1, but no longer limit the check to pressure levels only. * Per #2795, add comments * Whitespace * Per #2795, port level mismatch fix over to Ensemble-Stat. Check it for each verification task, but only print it once for each task, rather than once for each task * ensemble member. * Feature #2870 removing_MISSING_warning (#2872) * Per #2870, define utility functions for parsing the file type from a file list and for logging missing files, checking for the MISSING keyword. Also, update Ensemble-Stat and Gen-Ens-Prod to call these functions. * Per #2870, update the gen_ens_prod tests to demonstrate the use of the MISSING keyword for missing files. METplus uses this keyword for Ensemble-Stat and Gen-Ens-Prod. * Feature 2842 ugrid config (#2875) * #2842 Removed UGrid related setting * #2842 Corrected vertical level for data_plane_array * #2842 Do not allow the time range * #2842 The UGridConfig file can be passed as ugrid_dataset * #2842 Changed -config option to -ugrid_config * #2842 Deleted UGrid configurations * 2842 Fix a compile error when UGrid is disabled * #2842 Cleanup * #2842 Added an unittest point_stat_ugrid_mpas_config * #2842 Added a PointStatConfig without UGrid dataset. * #2842 Corrected ty[po at the variable name * Switched from time_centered to time_instant. I think time_centered is the center of the forecast lead window and time_instant is the time the forecast is valid (end of forecast window). * #2842 Removed ugrid_max_distance_km and unused metadata names * #2842 Restored time variable time_instant for LFric * #2842 Adjust lon between -180 and 180 * #2842 Adjust lon between -180 and 180 * #2842 Adjust lon between -180 and 180 * #2842 Adjusted lon to between -180 to 180 * #2842 Changed variable names * Per #2842, switch from degrees east to west right when the longitudes are read. * #2842, switch from degrees east to west right when the longitudes are read * #2842 Cleanup debug messages * #2842 Disabled output types except STAT for sl1l2 * #2842 Disabled output types except STAT for sl1l2 and MPR * #2842 Reduced output files for UGrid --------- Co-authored-by: Howard Soh Co-authored-by: Daniel Adriaansen Co-authored-by: John Halley Gotway * Hotfix to develop branch to remove duplicate test named 'point_stat_ugrid_mpas_config'. That was causing unit_ugrid.xml to fail because it was still looking for .txt output files that are no longer being generated. * Feature 2748 document ugrid (#2869) * Initial documentation of the UGRID capability. * Fixes error in references, adds appendix to index, and adds sub-section for configuration entries and a table for metadata map items. * Corrects LFRic, rewords section on UGRID conventions, updates description of using GridStat, and removes mention of nodes. * Forgot one more mention of UGRID conventions. * Incorporates more suggestions from @willmayfield. * Switches to numerical table reference. * Feature #2781 Convert MET NetCDF point obs to Pandas DataFrame (#2877) * Per #2781, added function to convert MET NetCDF point observation data to pandas so it can be read and modified in a python embedding script. Added example python embedding script * ignore python cache files * fixed function call * reduce cognitive complexity to satisfy SonarQube and add boolean return value to catch if function fails to read data * clean up script and add comments * replace call to object function that doesn't exist, handle exception when file passed to script cannot be read by the NetCDF library * rename example script * add new example script to makefiles * fix logic to build pandas DataFrame to properly get header information from observation header IDs * Per #2781, add unit test to demonstrate python embedding script that reads MET NetCDF point observation file and converts it to a pandas DataFrame * Per #2781, added init function for nc_point_obs to take an input filename. Also raise TypeError exception from nc_point_obs.read_data() if input file cannot be read * call parent class init function to properly initialize nc_point_obs * Feature #2833 pcp_combine_missing (#2886) * Per #2883, add -input_thresh command line option to configure allowable missing input files. * Per #2883, update pcp_combine usage statement. * Per #2883, update existing pcp_combine -derive unit test example by adding 3 new missing file inputs at the beginning, middle, and end of the file list. The first two are ignored since they include the MISSING keyword, but the third without that keyword triggers a warning message as desired. The -input_thresh option is added to only require 70% of the input files be present. This should produce the exact same output data. * Per #2883, update the pcp_combine logic for the sum command to allow missing data files based on the -input_thresh threshold. Add a test in unit_pcp_combine.xml to demonstrate. * Update docs/Users_Guide/reformat_grid.rst Co-authored-by: George McCabe <23407799+georgemccabe@users.noreply.github.com> * Per #2883, update pcp_combine usage statement in the code to be more simliar to the User's Guide. * Per #2883, switch to using derive_file_list_missing as the one containing missing files and recreate derive_file_list as it had existed for the test named pcp_combine_derive_VLD_THRESH. * Per #2883, move initialization inside the same loop to resolve SonarQube issues. * Per #2883, update sum_data_files() to switch from allocating memory to using STL vectors to satisfy SonarQube. * Per #2883, changes to declarations of variables to satisfy SonarQube. * Per #2883, address more SonarQube issues * Per #2883, backing out an unintended change I made to tcrmw_grid.cc. This change belongs on a different branch. * Per #2883, update logic of parse_file_list_type() function to handle python input strings. Also update pcp_combine to parse the type of input files being read and log non-missing python input files expected. --------- Co-authored-by: George McCabe <23407799+georgemccabe@users.noreply.github.com> * Per #2888, update STATAnalysisJob::dump_stat_line() to support dumping stat line types VCNT, RPS, DMAP, and SSIDX. (#2891) * Per #2659, making updates as proposed at the 20240516 MET Eng. Mtg. (#2895) * Feature #2395 TOTAL_DIR (#2892) * Per #2395, remove the n_dir_undef and n_dira_undef variables that are superceded by the new dcount and dacount VL1L2Info members to keep track of the number of valid wind direction vectors. * Per #2395, add TOTAL_DIR columns to the VL1L2, VAL1L2, and VCNT line types and update the header column tables. * Per #2395, update the User's Guide to list the new TOTAL_DIR columns in the VL1L2, VAL1L2, and VCNT line types. * Per #2395, update stat_analysis to parse the new TOTAL_DIR columns and use the values to aggregate results when needed. * Per #2395, for SonarQube change 'const char *' to 'const char * const' to satisfy the finding that 'Global variables should be const.' Should probably switch from 'char char *' to strings eventually. But for now, I'm just making up for some SonarQube technical debt. * Per #2395, fix typo in placement of the DIR_ME column name in the met_header_columns_V12.0.txt file * Per #2395, add 2 new Stat-Analysis jobs to demonstrate the processing of VL1L2 lines. * Per #2395, update logic of is_vector_dir_stat(). Instead of just checking 'DIR_', check 'DIR_ME', 'DIR_MAE', and 'DIR_MSE' to avoid an false positive match for the 'DIR_ERR' column which is computed from the vector partial sums rather than the individual direction differences. * Bugfix #2897 develop python_valid_time (#2899) * Per #2897, fix typos in 2 log messages. Also fix the bug in storing the valid time strings. The time string in vld_array should exactly correspond to the numeric unixtime values in vld_num_array. Therefore they need to be updated inside the same if block. The bug is that we were storing only the unique unixtime values but storing ALL of the valid time string, not just the unique ones. * Per #2897, minor change to formatting of log message * MET #2897, don’t waste time searching, just set the index to n - 1 * Per #2897, remove unused add_prec_point_obs(...) function * Per #2897, update add_point_obs(...) logic for DEBUG(9) to print very detailed log messages about what obs are being rejected and which are being used for each verification task. * Per #2897, refine the 'using' log message to make the wording consistent with the summary rejection reason counts log message * Per #2897, update the User's Guide about -v 9 for Point-Stat --------- Co-authored-by: j-opatz Co-authored-by: MET Tools Test Account * Bugfix 2867 point2grid qc flag (#2890) * #2867 Added compute_adp_qc_flag and adjusted ADP QC flags * #2867 Added point2grid_GOES_16_ADP_Enterprise_high. Changed AOD QC flags to 0,1,2 (was 1,2,3) * #2867 Added get_nc_att_values_ * #2867 Added get_nc_att_values. Added the argument allow_conversion to get_nc_data(netCDF::NcVar *, uchar *data) * #2867 Read the ADP QC flag values and meanings attributes from DQF variable and set the QC high, meduium, low values to support Enterprise algorithm. Adjusted the ADP QC values by using AOD qc values * #2867 Cleanup * #2867 Corrected indent * #2867 Changed log message * #2867 Removed unused argument * #2867 Removed unused argument * Cleanup * #2867 Fix SonarQube findings * #2867 Deleted protected section with no members * #2867 Cleanup * #2867 FIxed SonarQube findings; unused local variables, decalare as const, etc * #2867 MOved include directives to top * #2867 Changed some argumenmt with references to avoid copying objects * #2867 Do not filter by QC flag if -qc is not given * #2867 Use enumj class for GOES QC: HIGH, MEDIUM, and LOW * #2867 Added log message back which were deleted accidently * #2867 Chaned statci const to constexpr * #2867 Initial release. Separated from nc_utils.h * @2867 Added nc_utils_core.h * #2867 Moved some blocks to nc_utils_core.h * #2867 Include nc_utils_core.h * #2867 Added const references * Per #2867, fixing typo in comments. --------- Co-authored-by: Howard Soh Co-authored-by: j-opatz * Hotfix to develop to fix the update_truth.yml workflow logic. This testing workflow run failed (https://github.com/dtcenter/MET/actions/runs/9209471209). Here we switch to a unique update truth branch name to avoid conflicts. * Avoid pushing directly to the develop or main_vX.Y branches since that is not necessary for the automation logic in MET. * #2904 Changed R path to R-4.4.0 (#2905) Co-authored-by: Howard Soh * Feature #2912 pb2nc error (#2914) * Feature 2717 convert unit.pl to unit.py (#2871) * created unit.py module in new internal/test_unit/python directory * added xml parsing to unit.py * added repl_env function * added reading of the remaining xml tags in build_tests function * progress on main function (putting together test commands) * a few more lines in the main function * minor updates * fixed how the test command was being run * added if name/main and command line parsing * fixed handling of no 'env' in cmd_only mode * handle params from xml that have \ after filename without space in between * added logging * added some more pieces to unit * more updates to unit.py, including running checks on output files * bug fixes, improved handling of output file names, improved handling of env vars, improved logging output * fixed how shell commands are run, and other minor fixes * added last bits from the perl script, fixed some bugs * created unit.py module in new internal/test_unit/python directory * added xml parsing to unit.py * added repl_env function * added reading of the remaining xml tags in build_tests function * progress on main function (putting together test commands) * a few more lines in the main function * minor updates * update scripts to call python unit test script instead of the old perl script * fix she-bang line to allow script to be run without python3 before it * add missing test_dir and exit_on_fail tags that are found in the rest of the unit test xml files * fix call to logger.warning * change tags named 'exists' to 'exist' to match the rest of the xml files * added logger to function * removed tab at end of line that was causing output file path to be excluded from the command * fix broken checks for output files * incorporated george's recommended changes * changed default to overwrite logs; allow for more than one xml file to be passed in command --------- Co-authored-by: Natalie babij Co-authored-by: Natalie babij Co-authored-by: Natalie babij Co-authored-by: Natalie Babij Co-authored-by: John Halley Gotway Co-authored-by: George McCabe <23407799+georgemccabe@users.noreply.github.com> Co-authored-by: j-opatz * Bugfix 2867 point2grid qc unittest (#2913) * #2867 Added compute_adp_qc_flag and adjusted ADP QC flags * #2867 Added point2grid_GOES_16_ADP_Enterprise_high. Changed AOD QC flags to 0,1,2 (was 1,2,3) * #2867 Added get_nc_att_values_ * #2867 Added get_nc_att_values. Added the argument allow_conversion to get_nc_data(netCDF::NcVar *, uchar *data) * #2867 Read the ADP QC flag values and meanings attributes from DQF variable and set the QC high, meduium, low values to support Enterprise algorithm. Adjusted the ADP QC values by using AOD qc values * #2867 Cleanup * #2867 Corrected indent * #2867 Changed log message * #2867 Removed unused argument * #2867 Removed unused argument * Cleanup * #2867 Fix SonarQube findings * #2867 Deleted protected section with no members * #2867 Cleanup * #2867 FIxed SonarQube findings; unused local variables, decalare as const, etc * #2867 MOved include directives to top * #2867 Changed some argumenmt with references to avoid copying objects * #2867 Do not filter by QC flag if -qc is not given * #2867 Use enumj class for GOES QC: HIGH, MEDIUM, and LOW * #2867 Added log message back which were deleted accidently * #2867 Chaned statci const to constexpr * #2867 Initial release. Separated from nc_utils.h * @2867 Added nc_utils_core.h * #2867 Moved some blocks to nc_utils_core.h * #2867 Include nc_utils_core.h * #2867 Added const references * #2867 Some 'static const' were chnaged to constexpr * #2867 Changed -qc options (1,2,3 to 0,1 - high & medium) for AOD * #2867 Merged develop branch * #2867 Corrected the unit test name --------- Co-authored-by: Howard Soh * Feature #2911 tc_stat_set_hdr (#2916) * Per #2911, no real changes for Stat-Analysis. Just changing order of variables for consistency. * Per #2911, add StatHdrColumns::apply_set_hdr_opts(...) function to be used by TC-Stat. * Per #2911, move ByColumn to the TCStatJob base class and add HdrName and HdrValue to support the -set_hdr job command. * Per #2911, update GSI tools to call the newly added StatHdrColumns::apply_set_hdr_opts(...) function. * Per #2911, update logic of Stat-Analysis for consistency to make use of common apply_set_hdr_opts() function. * Per #2911, add DataLine::set_item() function to support -set_hdr options. * Per #2911, just update contents of error message * Per #2911, add TCStatLine member functions for has() and get_offset(). * Per #2911, update tc_stat to support applying -set_hdr to TC-Stat filter jobs. * Per #2911, revise TC-Stat config files to exercise the -set_hdr job command option * Per #2911, update TC-Stat documentation to mention the -set_hdr job command option * Per #2911, add note * Per #2911, as recommended by SonarQube, make some of these member functions const. * Bugfix #2856 develop ens_climo (#2918) * Per #2856, port over fixes from main_v11.1 to develop. * Per #2856, correct conditionals in set_job_controls.sh and tweak existing Ensemble-Stat configuration file to exercise the logic that's being impacted here. * Bugfix #2841 develop tang_rad_winds (#2921) * Per #2841, port over fixes from bugfix_2841_main_v11.1_tang_rad_winds for the develop branch * Per #2841, clarify in the docs that azimuths are defined in degrees counter-clockwise from due east. * Per #2841, just updating with output from enum_to_string. * Per #2841, tweak the documentation. * Per #2841, correct the location of using namespace lines. * Per #2841, update compute_tc_diag.py to no longer skip writing the radial and tangential wind diagnostics. * Per #2841, update compute_tc_diag.py to no longer skip writing radial and tangential wind diagnostics. * Revert "Per #2841, update compute_tc_diag.py to no longer skip writing radial and tangential wind diagnostics." This reverts commit f097345bedcfcca663e8fb4322eed5b5e00e19fd. * Revert "Per #2841, update compute_tc_diag.py to no longer skip writing the radial and tangential wind diagnostics." This reverts commit c0402151b038c59efab99c060cc5c390edf002f6. * Per #2841, update comp_dir.sh logic to include .dat in the files that are diffed * Replace tab with spaces * Per #2841, correct the units for the azimuth netcdf output variable * Per #2841, reverse the x dimension of the rotated latlon grid to effectively switch from counterclockwise rotation to clockwise. --------- Co-authored-by: MET Tools Test Account * Feature #2601 seeps climo config (#2927) * #2601 Added seeps_grid_climo_name and seeps_point_climo_name * #2601 Added seeps_grid_climo_name * #2601 Removed SEEPS settings * #2601 Initial release * #2601 Changed to set the SEEPS climo by using the configuration * #2601 Removed SEESP settings at PointStatConfig_APCP and use PointStatConfig_SEEPS for SEEPSm testing * #2601 Updated descryption for seeps_grid_climo_name * #2601 Added a argument for the SEEPS clomo file * #2601 Added conf_key_seeps_grid_climo_name and conf_key_seeps_point_climo_name * #2601 Support the climo filename from the configuration * #2601 Corrected key for climo name * Removing duplicate word --------- Co-authored-by: Howard Soh Co-authored-by: Julie Prestopnik * Feature 2673 sonarqube beta5 redundant parentheses (#2930) * #2673 Removed redundant_parentheses * #2673 Removed redundant_parentheses * #2673 Removed redundant parentheses * #2673 Removed redundant parentheses --------- Co-authored-by: Howard Soh * Fix release checksum action (#2929) * Feature 2857 tripolar coordinates (#2928) * #2857 Added MetNcCFDataFile::build_grid_from_lat_lon_vars * #2857 Added NcCfFile::build_grid_from_lat_lon_vars * #2857 Check the coordinates attribute to find latitude, longitude, and time variables * #2857 Get the lat/lon variables from coordinates attribute if exists * #2857 Added two constants * #2857 Deleted debug messages * #2857 Added lat_vname and lon_vname for var_name_map * #2857 Added two unit tests: point2grid_sea_ice_tripolar and point2grid_sea_ice_tripolar_config * #2857 Initial release * #2857 Correct dictinary to get file_type * #2857 DO not check the time variable for point2grid * #2857 Added point2grid_tripolar_rtofs --------- Co-authored-by: Howard Soh * Feature 2932 v12.0.0-beta5 (#2933) * Per #2932, updating version and release notes * Per #2932, updating date on release notes * Per #2932, fixed formatting and links * Update release-notes.rst * Update release-notes.rst Removing inline backticks since they do not format the way I expected, especially when put inside bolded release notes. --------- Co-authored-by: John Halley Gotway * Feature fix release notes (#2934) * Fixing up release notes * Update release-notes.rst --------- Co-authored-by: John Halley Gotway * Per dtcenter/METplus#2643 discussion, add more detail about the budget interpolation method. * Feature #2924 fcst climo, PR 1 of 2 (#2939) * Per #2924, Update the MPR and ORANK output line types to just write duplicate existing climo values, update the header tables and MPR/ORANK documentation tables. * Per #2924, update get_n_orank_columns() logic * Per #2924, update the Stat-Analysis parsing logic to parse the new MPR and ORANK climatology columns. * Per #2924, making some changes to the vx_statistics library to store climo data... but more work to come. Committing this first set of changes that are incomplete but do compile. * Per #2924, this big set of changes does compile but make test produces a segfault for ensemble-stat * Per #2924, fix return value for is_keeper_obs() * Per #2924, move fcst_info/obs_info into the VxPairBase pointer. * Per #2924, update Ensemble-Stat to set the VxPairBase::fcst_info pointer * Per #2924 udpate handling of fcst_info and obs_info pointers in Ensemble-Stat * Per #2924, update the GSI tools to handle the new fcst climo columns. * Per #2924, add backward compatibility logic so that when old climo column names are requested, the new ones are used. * Per #2924, print a DEBUG(2) log message if old column names are used. * Per #2924, switch the unit tests to reference the updated MPR column names rather than the old ones. * Per #2924, working progress. Not fully compiling yet * Per #2924, another round of changes. Removing MPR:FCST_CLIMO_CDF output column. This compiles but not sure if it actually runs yet * Per #2924, work in progress * Per #2924, work in progress. Almost compiling again. * Per #2924, get it compiling * Per #2924, add back in support for SCP and CDP which are interpreted as SOCP and OCDP, resp * Per #2924, update docs about SCP and CDP threshold types * Per #2924, minor whitespace changes * Per #2924, fix an uninitialized pointer bug by defining/calling SeepsClimoGrid::init_from_scratch() member function. The constructor had been calling clear() to delete pointers that weren't properly initialized to nullptr. Also, simplify some map processing logic. * Per #2924, rename SeepsAggScore from seeps to seeps_agg for clarity and to avoid conflicts in member function implementations. * Per #2924, fix seeps compilation error in Point-Stat * Per #2924, fix bug in the boolean logic for handling the do_climo_cdp NetCDF output option. * Per #2924, add missing exit statement. * Per #2924, tweak threshold.h * Per #2924, define one perc_thresh_info entry for each enumerated PercThreshType value * Per #2924, simplify the logic for handling percentile threshold types and print a log message once when the old versions are still used. * Per #2924, update the string comparison return value logic * Per #2924, fix the perc thresh string parsing logic by calling ConcatString::startswith() * Per #2924, switch all instances of CDP to OCDP. Gen-Ens-Prod was writing NetCDF files with OCDP in the output variable names, but Grid-Stat was requesting that the wrong variable name be read. So the unit tests failed. * Per #2924, add more doc details * Per #2924, update default config file to indicate when climo_mean and climo_stdev can be set seperately in the fcst and obs dictionaries. * Per #2924, update the MET tools to parse climo_mean and climo_stdev separately from the fcst and obs dictionaries. * Per #2924, backing out new/modified columns to minimize reg test diffs * Per #2924, one more section to be commented out later. * Per #2924, replace several calls to strncmp() with ConcatString::startswith() to simplify the code * Per #2924, strip out some more references to OBS_CLIMO_... in the unit tests. * Per #2924, delete accidental file * Per #2924 fix broken XML comments * Per #2924, fix comments * Per #2924, address SonarQube findings * Per #2924, tweak a Point-Stat and Grid-Stat unit test config file to make the output more comparable to develop. * Per #2924, fix bug in the logic of PairDataPoint and PairDataEnsemble, when looping over the 3-dim array do not return when checking the climo and fcst values. Instead we need to continue to the next loop iteration. * Per #2924, address more SonarQube code smells to reduce the overall number in MET for this PR. * Per #2924, correct the logic for parsing climo data from MPR lines. * Per #2924, cleanup grid_stat.cc source code by making calls to DataPlane::is_empty() and Grid::nxy(). * Per #2924, remove unneeded ==0 * Hotfix to the develop branch for a copy/paste bug introduced by PR #2939 * Feature #2924 sal1l2_mae, PR 3 of 3 (#2943) * Per #2924, track SL1L2 and SAL1L2 MAE scores with separate variables since they are no longer the same value. I renamed the existing 'mae' as 'smae' and added a new 'samae' variable. Renaming the existing lets me use the compiler help find all references to it throughout the code. * Per #2924, update the User's Guide climatology details and equations. * Per #2924, some changes to aggr_stat_line.cc and series_analysis.cc to satisfy some SonarQube code smells. * Update develop to clarify masking poly options based on METplus Discussion dtcenter/METplus#2650 * Remove two semi-colons that are not actually necessary to avoid confusion. * Per dtcenter/METplus#2653 discussion, update the MTD usage statement to clarify that data specified in the fcst dictionary is read from the -single input files. * Feature #2924 fcst climo, PR 2 of 3 (#2942) * Per #2924, Update the MPR and ORANK output line types to just write duplicate existing climo values, update the header tables and MPR/ORANK documentation tables. * Per #2924, update get_n_orank_columns() logic * Per #2924, update the Stat-Analysis parsing logic to parse the new MPR and ORANK climatology columns. * Per #2924, making some changes to the vx_statistics library to store climo data... but more work to come. Committing this first set of changes that are incomplete but do compile. * Per #2924, this big set of changes does compile but make test produces a segfault for ensemble-stat * Per #2924, fix return value for is_keeper_obs() * Per #2924, move fcst_info/obs_info into the VxPairBase pointer. * Per #2924, update Ensemble-Stat to set the VxPairBase::fcst_info pointer * Per #2924 udpate handling of fcst_info and obs_info pointers in Ensemble-Stat * Per #2924, update the GSI tools to handle the new fcst climo columns. * Per #2924, add backward compatibility logic so that when old climo column names are requested, the new ones are used. * Per #2924, print a DEBUG(2) log message if old column names are used. * Per #2924, switch the unit tests to reference the updated MPR column names rather than the old ones. * Per #2924, working progress. Not fully compiling yet * Per #2924, another round of changes. Removing MPR:FCST_CLIMO_CDF output column. This compiles but not sure if it actually runs yet * Per #2924, work in progress * Per #2924, work in progress. Almost compiling again. * Per #2924, get it compiling * Per #2924, add back in support for SCP and CDP which are interpreted as SOCP and OCDP, resp * Per #2924, update docs about SCP and CDP threshold types * Per #2924, minor whitespace changes * Per #2924, fix an uninitialized pointer bug by defining/calling SeepsClimoGrid::init_from_scratch() member function. The constructor had been calling clear() to delete pointers that weren't properly initialized to nullptr. Also, simplify some map processing logic. * Per #2924, rename SeepsAggScore from seeps to seeps_agg for clarity and to avoid conflicts in member function implementations. * Per #2924, fix seeps compilation error in Point-Stat * Per #2924, fix bug in the boolean logic for handling the do_climo_cdp NetCDF output option. * Per #2924, add missing exit statement. * Per #2924, tweak threshold.h * Per #2924, define one perc_thresh_info entry for each enumerated PercThreshType value * Per #2924, simplify the logic for handling percentile threshold types and print a log message once when the old versions are still used. * Per #2924, update the string comparison return value logic * Per #2924, fix the perc thresh string parsing logic by calling ConcatString::startswith() * Per #2924, switch all instances of CDP to OCDP. Gen-Ens-Prod was writing NetCDF files with OCDP in the output variable names, but Grid-Stat was requesting that the wrong variable name be read. So the unit tests failed. * Per #2924, add more doc details * Per #2924, update default config file to indicate when climo_mean and climo_stdev can be set seperately in the fcst and obs dictionaries. * Per #2924, update the MET tools to parse climo_mean and climo_stdev separately from the fcst and obs dictionaries. * Per #2924, backing out new/modified columns to minimize reg test diffs * Per #2924, one more section to be commented out later. * Per #2924, replace several calls to strncmp() with ConcatString::startswith() to simplify the code * Per #2924, strip out some more references to OBS_CLIMO_... in the unit tests. * Per #2924, delete accidental file * Per #2924 fix broken XML comments * Per #2924, fix comments * Per #2924, address SonarQube findings * Per #2924, tweak a Point-Stat and Grid-Stat unit test config file to make the output more comparable to develop. * Per #2924, fix bug in the logic of PairDataPoint and PairDataEnsemble, when looping over the 3-dim array do not return when checking the climo and fcst values. Instead we need to continue to the next loop iteration. * Per #2924, address more SonarQube code smells to reduce the overall number in MET for this PR. * Per #2924, correct the logic for parsing climo data from MPR lines. * Per #2924, update MPR and ORANK line types to update/add FCST/OBS_CLIMO_MEAN/STDEV/CDF columns. * Per #2924, cleanup grid_stat.cc source code by making calls to DataPlane::is_empty() and Grid::nxy(). * Per #2924, remove unneeded ==0 * Per #2924, working on PR2. * Per #2924, update User's Guide with notional example of specifying climo_mean and climo_stdev separately in the fcst and obs dicts. * Per #2924, adding a new unit test. It does NOT yet run as expected. Will debug on seneca * Per #2924, pass the description string to the read_climo_data_plane*() function to provide better log messages * Per #2924, more work on consistent log messages * Per #2924, tweak the configuration to define both field, climo_mean, and climo_stdev in both the fcst and obs dictionaries * Per #2924, tweak the unit_climatology_mixed.xml test * Per #2924, only whitespace changes. * Per #2924, missed swapping MET #2924 changes in 3 test files * Per #2924, delete accidentally committed file * Per #2924, delete accidentally committed files * Per #2924, add support for GRIB1 time range indicator value of 123 used for the corresponding METplus Use Case. Note that there are 22 other TRI values not currently supported. * Adds caveat regarding longitudes appearing in DEBUG statements with a… (#2947) * Adds caveat regarding longitudes appearing in DEBUG statements with a different sign to the FAQ. * Update appendixA.rst Missing paren * Create install_met_env.cactus * Adding special script for installing beta5 on wcoss2 * Modifying script, including updates to eckit and atlas * Corrected version of bufr being used * Feature #2938 pb2nc_center_time (#2954) * Per #2938, define CRC_Array::add_uniq(...) member function which is now used in PB2NC * Per #2938, replace n_elements() with n() to make the code more concise. Refine log/warning message when multiple message center times are encountered. * Feature #1371 series_analysis (#2951) * Per #1371, add -input command line argument and add support for ALL for the CTC, MCTC, SL1L2, and PCT line types. * Per #1371, rename the -input command line option as -aggregate instead * Per #1371, work in progress * Per #1371, just comments * Per #1371, working on aggregating CTC counts * Per #1371, work in progress * Per #1371, update timing info using time stamps in the aggr file * Per #1371, close the aggregate data file * Per #1371, define set_event() and set_nonevent() member functions * Per #1371, add logic to aggregate MCTC and PCT counts * Merging changes from develop * Per #1371, work in progress aggregating all the line statistics types. Still have several issues to address * Per #1371, switch to using get_stat() functions * Per #1371, work in progress. More consolidation * Per #1371, correct expected output file name * Per #1371, consistent regridding log messages and fix the Series-Analysis PairDataPoint object handling logic. * Per #1371, check the return status when opening the aggregate file. * Per #1371, fix prc/pjc typo * Per #1371, fix the series_analysis PCT aggregation logic and add a test to unit_series_analysis.xml to demonstrate. * Per #1371, resolve a few SonarQube findings * Per #1371, make use of range-based for loop, as recommeded by SonarQube * Per #1371, update series-analysis to apply the valid data threshold properly using the old aggregate data and the new pair data. * Per #1371, update series_analysis to buffer data and write it all at once instead of storing data value by value for each point. * Per #1371, add useful error message when required aggregation variables are not present in the input -aggr file. * Per #1371, print a Debug(2) message listing the aggregation fields being read. * Per #1371, correct operator+= logic in met_stats.cc for SL1L2Info, VL1L2Info, and NBRCNTInfo. The metadata settings, like fthresh and othresh, were not being passed to the output. * Per #1371, the DataPlane for the computed statistics should be initialized to a field of bad data values rather than the default value of 0. Otherwise, 0's are reported for stats a grid points with no data when they should really be reported as bad data! * Per #1371, update logic of the compute_cntinfo() function so that CNT statistics can be derived from a single SL1L2Info object containing both scalar and scalar anomaly partial sums. These changes enable CNT:ANOM_CORR to be aggregated in the Series-Analysis tool. * Per #1371, fix logic of climo log message. * Per #1371, this is actually related to MET #2924. In compute_pctinfo() used obs climo data first, if provided. And if not, use fcst climo data. * Per #1371, fix indexing bug (+i instead of +1) when check the valid data count. Also update the logic of read_aggr_total() to return a count of 0 for bad data. * Per #1371, add logic to aggregate the PSTD BRIERCL and BSS statistics in the do_climo_brier() function. Tested manually to confirm that it works. * Per #1371, switch to using string literals to satisfy SonarQube * Per #1371, update series_analysis tests in unit_climatology_1.0deg.xml to demonstrate aggregating climo-based stats. * Per #1371, remove extra comment * Per #1371, skip writing the PCT THRESH_i columns to the Series-Analysis output since they are not used * Per #1371, fix the R string literals to remove \t and \n escape sequences. * Per #1371, update the read_aggr_data_plane() suggestion strings. * Per #1371, ignore unneeded PCT 'THRESH_' variables both when reading and writing ALL PCT columns. * Per #1371, update the test named series_analysis_AGGR_CMD_LINE to include data for the F42 lead time that had previously been included for the same run in the develop branch. Note however that the timestamps in the output file for the develop branch (2012040900_to_2012041100) were wrong and have been corrected here (2012040900_to_2012041018) to match the actual data. * Per #1371, update the -aggr note to warn users about slow runtimes --------- Co-authored-by: Howard Soh Co-authored-by: John Halley Gotway Co-authored-by: Howard Soh Co-authored-by: MET Tools Test Account Co-authored-by: davidalbo Co-authored-by: j-opatz Co-authored-by: Daniel Adriaansen Co-authored-by: Julie Prestopnik Co-authored-by: George McCabe <23407799+georgemccabe@users.noreply.github.com> Co-authored-by: natalieb-noaa <146213121+natalieb-noaa@users.noreply.github.com> Co-authored-by: Natalie babij Co-authored-by: Natalie babij Co-authored-by: Natalie babij Co-authored-by: Natalie Babij Co-authored-by: metplus-bot <97135045+metplus-bot@users.noreply.github.com> --- docs/Users_Guide/series-analysis.rst | 15 +- .../config/install_met_env.wcoss2 | 2 +- .../config/SeriesAnalysisConfig_climo | 10 +- .../config/SeriesAnalysisConfig_climo_prob | 2 +- .../test_unit/xml/unit_climatology_1.0deg.xml | 96 +- .../test_unit/xml/unit_series_analysis.xml | 112 +- src/basic/vx_util/crc_array.h | 17 + src/libcode/vx_stat_out/stat_columns.cc | 112 +- src/libcode/vx_stat_out/stat_columns.h | 3 + src/libcode/vx_statistics/compute_stats.cc | 182 +- src/libcode/vx_statistics/compute_stats.h | 3 +- src/libcode/vx_statistics/contable.h | 7 + src/libcode/vx_statistics/contable_nx2.cc | 90 +- src/libcode/vx_statistics/met_stats.cc | 1034 +++++++- src/libcode/vx_statistics/met_stats.h | 41 +- .../core/series_analysis/series_analysis.cc | 2183 +++++++++-------- .../core/series_analysis/series_analysis.h | 48 +- .../core/stat_analysis/aggr_stat_line.cc | 8 +- .../stat_analysis/skill_score_index_job.cc | 8 +- .../core/stat_analysis/stat_analysis_job.cc | 7 +- src/tools/other/pb2nc/pb2nc.cc | 155 +- 21 files changed, 2653 insertions(+), 1482 deletions(-) diff --git a/docs/Users_Guide/series-analysis.rst b/docs/Users_Guide/series-analysis.rst index 0be681585f..ed9f5578ab 100644 --- a/docs/Users_Guide/series-analysis.rst +++ b/docs/Users_Guide/series-analysis.rst @@ -33,6 +33,7 @@ The usage statement for the Series-Analysis tool is shown below: -fcst file_1 ... file_n | fcst_file_list -obs file_1 ... file_n | obs_file_list [-both file_1 ... file_n | both_file_list] + [-aggr file] [-paired] -out file -config file @@ -58,13 +59,17 @@ Optional Arguments for series_analysis 5. To set both the forecast and observations to the same set of files, use the optional -both file_1 ... file_n | both_file_list option to the same set of files. This is useful when reading the NetCDF matched pair output of the Grid-Stat tool which contains both forecast and observation data. -6. The -paired option indicates that the -fcst and -obs file lists are already paired, meaning there is a one-to-one correspondence between the files in those lists. This option affects how missing data is handled. When -paired is not used, missing or incomplete files result in a runtime error with no output file being created. When -paired is used, missing or incomplete files result in a warning with output being created using the available data. +6. The -aggr option specifies the path to an existing Series-Analysis output file. When computing statistics for the input forecast and observation data, Series-Analysis aggregates the partial sums (SL1L2, SAL1L2 line types) and contingency table counts (CTC, MCTC, and PCT line types) with data provided in the aggregate file. This option enables Series-Analysis to run iteratively and update existing partial sums, counts, and statistics with new data. -7. The -log file outputs log messages to the specified file. +.. note:: When the -aggr option is used, only statistics that are derivable from partial sums and contingency table counts can be requested. Runtimes are generally much slower when aggregating data since it requires many additional NetCDF variables containing the scalar partial sums and contingency table counts to be read and written. -8. The -v level overrides the default level of logging (2). +7. The -paired option indicates that the -fcst and -obs file lists are already paired, meaning there is a one-to-one correspondence between the files in those lists. This option affects how missing data is handled. When -paired is not used, missing or incomplete files result in a runtime error with no output file being created. When -paired is used, missing or incomplete files result in a warning with output being created using the available data. -9. The -compress level option indicates the desired level of compression (deflate level) for NetCDF variables. The valid level is between 0 and 9. The value of "level" will override the default setting of 0 from the configuration file or the environment variable MET_NC_COMPRESS. Setting the compression level to 0 will make no compression for the NetCDF output. Lower number is for fast compression and higher number is for better compression. +8. The -log file outputs log messages to the specified file. + +9. The -v level overrides the default level of logging (2). + +10. The -compress level option indicates the desired level of compression (deflate level) for NetCDF variables. The valid level is between 0 and 9. The value of "level" will override the default setting of 0 from the configuration file or the environment variable MET_NC_COMPRESS. Setting the compression level to 0 will make no compression for the NetCDF output. Lower number is for fast compression and higher number is for better compression. An example of the series_analysis calling sequence is shown below: @@ -179,3 +184,5 @@ The output_stats array controls the type of output that the Series-Analysis tool 11. PJC for Joint and Conditional factorization for Probabilistic forecasts (See :numref:`table_PS_format_info_PJC`) 12. PRC for Receiver Operating Characteristic for Probabilistic forecasts (See :numref:`table_PS_format_info_PRC`) + +.. note:: When the -input option is used, all partial sum and contingency table count columns are required to aggregate statistics across multiple runs. To facilitate this, the output_stats entries for the CTC, SL1L2, SAL1L2, and PCT line types can be set to "ALL" to indicate that all available columns for those line types should be written. diff --git a/internal/scripts/installation/config/install_met_env.wcoss2 b/internal/scripts/installation/config/install_met_env.wcoss2 index 2d02be1f87..68e7ef2872 100644 --- a/internal/scripts/installation/config/install_met_env.wcoss2 +++ b/internal/scripts/installation/config/install_met_env.wcoss2 @@ -4,7 +4,7 @@ module load ve/evs/2.0 module use /apps/ops/para/libs/modulefiles/compiler/intel/19.1.3.304 module load netcdf/4.7.4 module load hdf5/1.10.6 -module load bufr/11.5.0 +module load bufr/11.6.0 module load zlib/1.2.11 module load jasper/2.0.25 module load libpng/1.6.37 diff --git a/internal/test_unit/config/SeriesAnalysisConfig_climo b/internal/test_unit/config/SeriesAnalysisConfig_climo index 3728482541..f19bac7a20 100644 --- a/internal/test_unit/config/SeriesAnalysisConfig_climo +++ b/internal/test_unit/config/SeriesAnalysisConfig_climo @@ -132,13 +132,13 @@ vld_thresh = 0.5; // output_stats = { fho = [ "TOTAL", "F_RATE", "H_RATE", "O_RATE" ]; - ctc = [ ]; + ctc = [ "ALL" ]; cts = [ ]; mctc = [ ]; - mcts = [ "ACC" ]; - cnt = [ "TOTAL", "RMSE", "ANOM_CORR" ]; - sl1l2 = [ ]; - sal1l2 = [ ]; + mcts = [ ]; + cnt = [ "TOTAL", "RMSE", "ANOM_CORR", "RMSFA", "RMSOA" ]; + sl1l2 = [ "ALL" ]; + sal1l2 = [ "ALL" ]; pct = [ ]; pstd = [ ]; pjc = [ ]; diff --git a/internal/test_unit/config/SeriesAnalysisConfig_climo_prob b/internal/test_unit/config/SeriesAnalysisConfig_climo_prob index 8b55c508d3..149062dc41 100644 --- a/internal/test_unit/config/SeriesAnalysisConfig_climo_prob +++ b/internal/test_unit/config/SeriesAnalysisConfig_climo_prob @@ -148,7 +148,7 @@ output_stats = { cnt = [ ]; sl1l2 = [ ]; sal1l2 = [ ]; - pct = [ ]; + pct = [ "ALL" ]; pstd = [ "TOTAL", "ROC_AUC", "BRIER", "BRIERCL", "BSS", "BSS_SMPL" ]; pjc = [ ]; prc = [ ]; diff --git a/internal/test_unit/xml/unit_climatology_1.0deg.xml b/internal/test_unit/xml/unit_climatology_1.0deg.xml index a07d47ff6e..fcd6b59668 100644 --- a/internal/test_unit/xml/unit_climatology_1.0deg.xml +++ b/internal/test_unit/xml/unit_climatology_1.0deg.xml @@ -154,20 +154,18 @@ &OUTPUT_DIR;/climatology_1.0deg/stat_analysis_MPR_to_PSTD.stat - +--!> &MET_BIN;/series_analysis CLIMO_MEAN_FILE_LIST "&DATA_DIR_CLIMO;/NCEP_NCAR_40YR_1.0deg/cmean_1d.19590409", - "&DATA_DIR_CLIMO;/NCEP_NCAR_40YR_1.0deg/cmean_1d.19590410", - "&DATA_DIR_CLIMO;/NCEP_NCAR_40YR_1.0deg/cmean_1d.19590411" + "&DATA_DIR_CLIMO;/NCEP_NCAR_40YR_1.0deg/cmean_1d.19590410" CLIMO_STDEV_FILE_LIST "&DATA_DIR_CLIMO;/NCEP_NCAR_40YR_1.0deg/cstdv_1d.19590409", - "&DATA_DIR_CLIMO;/NCEP_NCAR_40YR_1.0deg/cstdv_1d.19590410", - "&DATA_DIR_CLIMO;/NCEP_NCAR_40YR_1.0deg/cstdv_1d.19590411" + "&DATA_DIR_CLIMO;/NCEP_NCAR_40YR_1.0deg/cstdv_1d.19590410" @@ -175,11 +173,9 @@ -fcst &DATA_DIR_MODEL;/grib2/gfs/gfs_2012040900_F012.grib2 \ &DATA_DIR_MODEL;/grib2/gfs/gfs_2012040900_F024.grib2 \ &DATA_DIR_MODEL;/grib2/gfs/gfs_2012040900_F036.grib2 \ - &DATA_DIR_MODEL;/grib2/gfs/gfs_2012040900_F048.grib2 \ -obs &DATA_DIR_MODEL;/grib2/gfsanl/gfsanl_4_20120409_1200_000.grb2 \ &DATA_DIR_MODEL;/grib2/gfsanl/gfsanl_4_20120410_0000_000.grb2 \ &DATA_DIR_MODEL;/grib2/gfsanl/gfsanl_4_20120410_1200_000.grb2 \ - &DATA_DIR_MODEL;/grib2/gfsanl/gfsanl_4_20120411_0000_000.grb2 \ -paired \ -out &OUTPUT_DIR;/climatology_1.0deg/series_analysis_GFS_CLIMO_1.0DEG.nc \ -config &CONFIG_DIR;/SeriesAnalysisConfig_climo \ @@ -190,25 +186,84 @@ + + &MET_BIN;/series_analysis + + CLIMO_MEAN_FILE_LIST + "&DATA_DIR_CLIMO;/NCEP_NCAR_40YR_1.0deg/cmean_1d.19590411" + + + CLIMO_STDEV_FILE_LIST + "&DATA_DIR_CLIMO;/NCEP_NCAR_40YR_1.0deg/cstdv_1d.19590411" + + + + \ + -fcst &DATA_DIR_MODEL;/grib2/gfs/gfs_2012040900_F048.grib2 \ + -obs &DATA_DIR_MODEL;/grib2/gfsanl/gfsanl_4_20120411_0000_000.grb2 \ + -paired \ + -aggr &OUTPUT_DIR;/climatology_1.0deg/series_analysis_GFS_CLIMO_1.0DEG.nc \ + -out &OUTPUT_DIR;/climatology_1.0deg/series_analysis_GFS_CLIMO_1.0DEG_AGGR.nc \ + -config &CONFIG_DIR;/SeriesAnalysisConfig_climo \ + -v 2 + + + &OUTPUT_DIR;/climatology_1.0deg/series_analysis_GFS_CLIMO_1.0DEG_AGGR.nc + + + echo "&DATA_DIR_MODEL;/grib2/sref_pr/sref_prob_2012040821_F003.grib2 \ &DATA_DIR_MODEL;/grib2/sref_pr/sref_prob_2012040821_F009.grib2 \ &DATA_DIR_MODEL;/grib2/sref_pr/sref_prob_2012040821_F015.grib2 \ - &DATA_DIR_MODEL;/grib2/sref_pr/sref_prob_2012040821_F021.grib2 \ - &DATA_DIR_MODEL;/grib2/sref_pr/sref_prob_2012040821_F027.grib2 \ - &DATA_DIR_MODEL;/grib2/sref_pr/sref_prob_2012040821_F033.grib2 \ - &DATA_DIR_MODEL;/grib2/sref_pr/sref_prob_2012040821_F039.grib2 \ - &DATA_DIR_MODEL;/grib2/sref_pr/sref_prob_2012040821_F045.grib2" \ - > &OUTPUT_DIR;/climatology_1.0deg/input_fcst_file_list; \ + &DATA_DIR_MODEL;/grib2/sref_pr/sref_prob_2012040821_F021.grib2" \ + > &OUTPUT_DIR;/climatology_1.0deg/20120409_fcst_file_list; \ echo "&DATA_DIR_MODEL;/grib2/gfsanl/gfsanl_4_20120409_0000_000.grb2 \ &DATA_DIR_MODEL;/grib2/gfsanl/gfsanl_4_20120409_0600_000.grb2 \ &DATA_DIR_MODEL;/grib2/gfsanl/gfsanl_4_20120409_1200_000.grb2 \ - &DATA_DIR_MODEL;/grib2/gfsanl/gfsanl_4_20120409_1800_000.grb2 \ - &DATA_DIR_MODEL;/grib2/gfsanl/gfsanl_4_20120410_0000_000.grb2 \ + &DATA_DIR_MODEL;/grib2/gfsanl/gfsanl_4_20120409_1800_000.grb2" \ + > &OUTPUT_DIR;/climatology_1.0deg/20120409_obs_file_list; \ + &MET_BIN;/series_analysis + + DAY_INTERVAL 1 + HOUR_INTERVAL 6 + CLIMO_MEAN_FILE_LIST + "&DATA_DIR_CLIMO;/NCEP_NCAR_40YR_1.0deg/cmean_1d.19590409", + "&DATA_DIR_CLIMO;/NCEP_NCAR_40YR_1.0deg/cmean_1d.19590410", + "&DATA_DIR_CLIMO;/NCEP_NCAR_40YR_1.0deg/cmean_1d.19590411" + + + CLIMO_STDEV_FILE_LIST + "&DATA_DIR_CLIMO;/NCEP_NCAR_40YR_1.0deg/cstdv_1d.19590409", + "&DATA_DIR_CLIMO;/NCEP_NCAR_40YR_1.0deg/cstdv_1d.19590410", + "&DATA_DIR_CLIMO;/NCEP_NCAR_40YR_1.0deg/cstdv_1d.19590411" + + + + \ + -fcst &OUTPUT_DIR;/climatology_1.0deg/20120409_fcst_file_list \ + -obs &OUTPUT_DIR;/climatology_1.0deg/20120409_obs_file_list \ + -paired \ + -out &OUTPUT_DIR;/climatology_1.0deg/series_analysis_PROB_CLIMO_1.0DEG.nc \ + -config &CONFIG_DIR;/SeriesAnalysisConfig_climo_prob \ + -v 2 + + + &OUTPUT_DIR;/climatology_1.0deg/series_analysis_PROB_CLIMO_1.0DEG.nc + + + + + echo "&DATA_DIR_MODEL;/grib2/sref_pr/sref_prob_2012040821_F027.grib2 \ + &DATA_DIR_MODEL;/grib2/sref_pr/sref_prob_2012040821_F033.grib2 \ + &DATA_DIR_MODEL;/grib2/sref_pr/sref_prob_2012040821_F039.grib2 \ + &DATA_DIR_MODEL;/grib2/sref_pr/sref_prob_2012040821_F045.grib2" \ + > &OUTPUT_DIR;/climatology_1.0deg/20120410_fcst_file_list; \ + echo "&DATA_DIR_MODEL;/grib2/gfsanl/gfsanl_4_20120410_0000_000.grb2 \ &DATA_DIR_MODEL;/grib2/gfsanl/gfsanl_4_20120410_0600_000.grb2 \ &DATA_DIR_MODEL;/grib2/gfsanl/gfsanl_4_20120410_1200_000.grb2 \ &DATA_DIR_MODEL;/grib2/gfsanl/gfsanl_4_20120410_1800_000.grb2" \ - > &OUTPUT_DIR;/climatology_1.0deg/input_obs_file_list; \ + > &OUTPUT_DIR;/climatology_1.0deg/20120410_obs_file_list; \ &MET_BIN;/series_analysis DAY_INTERVAL 1 @@ -227,15 +282,16 @@ \ - -fcst &OUTPUT_DIR;/climatology_1.0deg/input_fcst_file_list \ - -obs &OUTPUT_DIR;/climatology_1.0deg/input_obs_file_list \ + -fcst &OUTPUT_DIR;/climatology_1.0deg/20120410_fcst_file_list \ + -obs &OUTPUT_DIR;/climatology_1.0deg/20120410_obs_file_list \ -paired \ - -out &OUTPUT_DIR;/climatology_1.0deg/series_analysis_PROB_CLIMO_1.0DEG.nc \ + -aggr &OUTPUT_DIR;/climatology_1.0deg/series_analysis_PROB_CLIMO_1.0DEG.nc \ + -out &OUTPUT_DIR;/climatology_1.0deg/series_analysis_PROB_CLIMO_1.0DEG_AGGR.nc \ -config &CONFIG_DIR;/SeriesAnalysisConfig_climo_prob \ -v 2 - &OUTPUT_DIR;/climatology_1.0deg/series_analysis_PROB_CLIMO_1.0DEG.nc + &OUTPUT_DIR;/climatology_1.0deg/series_analysis_PROB_CLIMO_1.0DEG_AGGR.nc diff --git a/internal/test_unit/xml/unit_series_analysis.xml b/internal/test_unit/xml/unit_series_analysis.xml index c1e64416b3..96cda729dd 100644 --- a/internal/test_unit/xml/unit_series_analysis.xml +++ b/internal/test_unit/xml/unit_series_analysis.xml @@ -29,12 +29,12 @@ OBS_FIELD { name = "APCP"; level = [ "A06" ]; } MASK_POLY FHO_STATS "F_RATE", "O_RATE" - CTC_STATS "FY_OY", "FN_ON" + CTC_STATS "ALL" CTS_STATS "CSI", "GSS" - MCTC_STATS "F1_O1", "F2_O2", "F3_O3" + MCTC_STATS "ALL" MCTS_STATS "ACC", "ACC_NCL", "ACC_NCU" CNT_STATS "TOTAL", "ME", "ME_NCL", "ME_NCU" - SL1L2_STATS "FBAR", "OBAR" + SL1L2_STATS "ALL" SAL1L2_STATS PCT_STATS PSTD_STATS @@ -46,22 +46,56 @@ &DATA_DIR_MODEL;/grib1/gfs_hmt/gfs_2012040900_F012.grib \ &DATA_DIR_MODEL;/grib1/gfs_hmt/gfs_2012040900_F018.grib \ &DATA_DIR_MODEL;/grib1/gfs_hmt/gfs_2012040900_F024.grib \ - &DATA_DIR_MODEL;/grib1/gfs_hmt/gfs_2012040900_F030.grib \ - &DATA_DIR_MODEL;/grib1/gfs_hmt/gfs_2012040900_F036.grib \ - &DATA_DIR_MODEL;/grib1/gfs_hmt/gfs_2012040900_F042.grib \ -obs &DATA_DIR_OBS;/stage4_hmt/stage4_2012040906_06h.grib \ &DATA_DIR_OBS;/stage4_hmt/stage4_2012040912_06h.grib \ &DATA_DIR_OBS;/stage4_hmt/stage4_2012040918_06h.grib \ &DATA_DIR_OBS;/stage4_hmt/stage4_2012041000_06h.grib \ - &DATA_DIR_OBS;/stage4_hmt/stage4_2012041006_06h.grib \ + -out &OUTPUT_DIR;/series_analysis/series_analysis_CMD_LINE_APCP_06_2012040900_to_2012041000.nc \ + -config &CONFIG_DIR;/SeriesAnalysisConfig \ + -v 1 + + + &OUTPUT_DIR;/series_analysis/series_analysis_CMD_LINE_APCP_06_2012040900_to_2012041000.nc + + + + + &MET_BIN;/series_analysis + + MODEL GFS + OBTYPE STAGE4 + FCST_CAT_THRESH >0.0, >5.0 + FCST_FIELD { name = "APCP"; level = [ "A06" ]; } + OBS_CAT_THRESH >0.0, >5.0 + OBS_FIELD { name = "APCP"; level = [ "A06" ]; } + MASK_POLY + FHO_STATS "F_RATE", "O_RATE" + CTC_STATS "ALL" + CTS_STATS "CSI", "GSS" + MCTC_STATS "ALL" + MCTS_STATS "ACC", "ACC_NCL", "ACC_NCU" + CNT_STATS "TOTAL", "ME", "ME_NCL", "ME_NCU" + SL1L2_STATS "ALL" + SAL1L2_STATS + PCT_STATS + PSTD_STATS + PJC_STATS + PRC_STATS + + \ + -fcst &DATA_DIR_MODEL;/grib1/gfs_hmt/gfs_2012040900_F030.grib \ + &DATA_DIR_MODEL;/grib1/gfs_hmt/gfs_2012040900_F036.grib \ + &DATA_DIR_MODEL;/grib1/gfs_hmt/gfs_2012040900_F042.grib \ + -obs &DATA_DIR_OBS;/stage4_hmt/stage4_2012041006_06h.grib \ &DATA_DIR_OBS;/stage4_hmt/stage4_2012041012_06h.grib \ &DATA_DIR_OBS;/stage4_hmt/stage4_2012041018_06h.grib \ - -out &OUTPUT_DIR;/series_analysis/series_analysis_CMD_LINE_APCP_06_2012040900_to_2012041100.nc \ + -aggr &OUTPUT_DIR;/series_analysis/series_analysis_CMD_LINE_APCP_06_2012040900_to_2012041000.nc \ + -out &OUTPUT_DIR;/series_analysis/series_analysis_AGGR_CMD_LINE_APCP_06_2012040900_to_2012041018.nc \ -config &CONFIG_DIR;/SeriesAnalysisConfig \ -v 1 - &OUTPUT_DIR;/series_analysis/series_analysis_CMD_LINE_APCP_06_2012040900_to_2012041100.nc + &OUTPUT_DIR;/series_analysis/series_analysis_AGGR_CMD_LINE_APCP_06_2012040900_to_2012041018.nc @@ -69,18 +103,12 @@ echo "&DATA_DIR_MODEL;/grib1/sref/sref_2012040821_F009.grib \ &DATA_DIR_MODEL;/grib1/sref/sref_2012040821_F015.grib \ &DATA_DIR_MODEL;/grib1/sref/sref_2012040821_F021.grib \ - &DATA_DIR_MODEL;/grib1/sref/sref_2012040821_F027.grib \ - &DATA_DIR_MODEL;/grib1/sref/sref_2012040821_F033.grib \ - &DATA_DIR_MODEL;/grib1/sref/sref_2012040821_F039.grib \ - &DATA_DIR_MODEL;/grib1/sref/sref_2012040821_F045.grib" \ + &DATA_DIR_MODEL;/grib1/sref/sref_2012040821_F027.grib" \ > &OUTPUT_DIR;/series_analysis/input_fcst_file_list; \ echo "&DATA_DIR_OBS;/stage4_hmt/stage4_2012040906_06h.grib \ &DATA_DIR_OBS;/stage4_hmt/stage4_2012040912_06h.grib \ &DATA_DIR_OBS;/stage4_hmt/stage4_2012040918_06h.grib \ - &DATA_DIR_OBS;/stage4_hmt/stage4_2012041000_06h.grib \ - &DATA_DIR_OBS;/stage4_hmt/stage4_2012041006_06h.grib \ - &DATA_DIR_OBS;/stage4_hmt/stage4_2012041012_06h.grib \ - &DATA_DIR_OBS;/stage4_hmt/stage4_2012041018_06h.grib" \ + &DATA_DIR_OBS;/stage4_hmt/stage4_2012041000_06h.grib" \ > &OUTPUT_DIR;/series_analysis/input_obs_file_list; \ &MET_BIN;/series_analysis @@ -99,7 +127,7 @@ CNT_STATS SL1L2_STATS SAL1L2_STATS - PCT_STATS "OY_1", "ON_1" + PCT_STATS "ALL" PSTD_STATS "TOTAL", "ROC_AUC", "BRIER", "BRIER_NCL", "BRIER_NCU" PJC_STATS "CALIBRATION_1", "REFINEMENT_1" PRC_STATS "PODY_1", "POFD_1" @@ -107,12 +135,56 @@ \ -fcst &OUTPUT_DIR;/series_analysis/input_fcst_file_list \ -obs &OUTPUT_DIR;/series_analysis/input_obs_file_list \ - -out &OUTPUT_DIR;/series_analysis/series_analysis_FILE_LIST_PROB_APCP_06_2012040900_to_2012041100.nc \ + -out &OUTPUT_DIR;/series_analysis/series_analysis_FILE_LIST_PROB_APCP_06_2012040900_to_2012041000.nc \ + -config &CONFIG_DIR;/SeriesAnalysisConfig \ + -v 1 + + + &OUTPUT_DIR;/series_analysis/series_analysis_FILE_LIST_PROB_APCP_06_2012040900_to_2012041000.nc + + + + + echo "&DATA_DIR_MODEL;/grib1/sref/sref_2012040821_F033.grib \ + &DATA_DIR_MODEL;/grib1/sref/sref_2012040821_F039.grib \ + &DATA_DIR_MODEL;/grib1/sref/sref_2012040821_F045.grib" \ + > &OUTPUT_DIR;/series_analysis/aggregate_fcst_file_list; \ + echo "&DATA_DIR_OBS;/stage4_hmt/stage4_2012041006_06h.grib \ + &DATA_DIR_OBS;/stage4_hmt/stage4_2012041012_06h.grib \ + &DATA_DIR_OBS;/stage4_hmt/stage4_2012041018_06h.grib" \ + > &OUTPUT_DIR;/series_analysis/aggregate_obs_file_list; \ + &MET_BIN;/series_analysis + + MODEL SREF + OBTYPE STAGE4 + FCST_CAT_THRESH >=0.00, >=0.25, >=0.50, >=0.75, >=1.00 + FCST_FIELD { name = "PROB"; level = "A06"; prob = { name = "APCP"; thresh_lo = 0.25; }; } + OBS_CAT_THRESH >0.25 + OBS_FIELD { name = "APCP"; level = "A06"; } + MASK_POLY + FHO_STATS + CTC_STATS + CTS_STATS + MCTC_STATS + MCTS_STATS + CNT_STATS + SL1L2_STATS + SAL1L2_STATS + PCT_STATS "ALL" + PSTD_STATS "TOTAL", "ROC_AUC", "BRIER", "BRIER_NCL", "BRIER_NCU" + PJC_STATS "CALIBRATION_1", "REFINEMENT_1" + PRC_STATS "PODY_1", "POFD_1" + + \ + -fcst &OUTPUT_DIR;/series_analysis/aggregate_fcst_file_list \ + -obs &OUTPUT_DIR;/series_analysis/aggregate_obs_file_list \ + -aggr &OUTPUT_DIR;/series_analysis/series_analysis_FILE_LIST_PROB_APCP_06_2012040900_to_2012041000.nc \ + -out &OUTPUT_DIR;/series_analysis/series_analysis_AGGR_FILE_LIST_PROB_APCP_06_2012040900_to_2012041018.nc \ -config &CONFIG_DIR;/SeriesAnalysisConfig \ -v 1 - &OUTPUT_DIR;/series_analysis/series_analysis_FILE_LIST_PROB_APCP_06_2012040900_to_2012041100.nc + &OUTPUT_DIR;/series_analysis/series_analysis_AGGR_FILE_LIST_PROB_APCP_06_2012040900_to_2012041018.nc diff --git a/src/basic/vx_util/crc_array.h b/src/basic/vx_util/crc_array.h index 02805d1bf5..6a8841afce 100644 --- a/src/basic/vx_util/crc_array.h +++ b/src/basic/vx_util/crc_array.h @@ -108,6 +108,7 @@ class CRC_Array { void add(const T &); void add(const CRC_Array &); + void add_uniq(const T &, bool forward=true); void add_css_sec(const char *); void set(const T & val); @@ -509,6 +510,22 @@ return; //////////////////////////////////////////////////////////////////////// +template + +void CRC_Array::add_uniq(const T & k, bool forward) + +{ + +if ( !has(k, forward) ) add(k); + +return; + +} + + +//////////////////////////////////////////////////////////////////////// + + template void CRC_Array::add_css_sec(const char * text) diff --git a/src/libcode/vx_stat_out/stat_columns.cc b/src/libcode/vx_stat_out/stat_columns.cc index 51992d7b50..f4ccb6354b 100644 --- a/src/libcode/vx_stat_out/stat_columns.cc +++ b/src/libcode/vx_stat_out/stat_columns.cc @@ -122,32 +122,18 @@ void write_header_row(const char * const * cols, int n_cols, int hdr_flag, void write_mctc_header_row(int hdr_flag, int n_cat, AsciiTable &at, int r, int c) { - int i, j, col; - ConcatString cs; // Write the header column names if requested if(hdr_flag) { - for(i=0; i= 1) { - tmp_str.format("%s%i", pct_columns[2], n_thresh); - at.set_entry(r, col, tmp_str); // Threshold - } + StringArray sa(get_pct_columns(n_thresh)); + for(int i=0; i= 1) { + cs.format("%s%i", pct_columns[2], n_thresh); + sa.add(cs); + } + + return sa; +} + + //////////////////////////////////////////////////////////////////////// void write_fho_row(StatHdrColumns &shc, const CTSInfo &cts_info, @@ -4608,7 +4630,7 @@ void write_ssvar_cols(const PairDataEnsemble *pd_ptr, int i, // cnt_info.allocate_n_alpha(1); cnt_info.alpha[0] = alpha; - compute_cntinfo(pd_ptr->ssvar_bins[i].sl1l2_info, 0, cnt_info); + compute_cntinfo(pd_ptr->ssvar_bins[i].sl1l2_info, cnt_info); // // Ensemble spread/skill variance bins diff --git a/src/libcode/vx_stat_out/stat_columns.h b/src/libcode/vx_stat_out/stat_columns.h index 8af99ad730..7eb21c6fa0 100644 --- a/src/libcode/vx_stat_out/stat_columns.h +++ b/src/libcode/vx_stat_out/stat_columns.h @@ -49,6 +49,9 @@ extern void write_phist_header_row (int, int, AsciiTable &, int, int); extern void write_orank_header_row (int, int, AsciiTable &, int, int); extern void write_relp_header_row (int, int, AsciiTable &, int, int); +extern StringArray get_mctc_columns (int); +extern StringArray get_pct_columns (int); + extern void write_fho_row (StatHdrColumns &, const CTSInfo &, STATOutputType, AsciiTable &, int &, AsciiTable &, int &); extern void write_ctc_row (StatHdrColumns &, const CTSInfo &, STATOutputType, diff --git a/src/libcode/vx_statistics/compute_stats.cc b/src/libcode/vx_statistics/compute_stats.cc index bbc9e0ac1a..c0d988efd3 100644 --- a/src/libcode/vx_statistics/compute_stats.cc +++ b/src/libcode/vx_statistics/compute_stats.cc @@ -27,112 +27,97 @@ using namespace std; const int detailed_debug_level = 5; - //////////////////////////////////////////////////////////////////////// -void compute_cntinfo(const SL1L2Info &s, bool aflag, CNTInfo &cnt_info) { - double fbar, obar, ffbar, fobar, oobar, den; - int n; +void compute_cntinfo(const SL1L2Info &s, CNTInfo &cnt_info) { + + // Initialize statistics + cnt_info.zero_out(); - // Set the quantities that can't be derived from SL1L2Info to bad data - cnt_info.sp_corr.set_bad_data(); - cnt_info.kt_corr.set_bad_data(); - cnt_info.e10.set_bad_data(); - cnt_info.e25.set_bad_data(); - cnt_info.e50.set_bad_data(); - cnt_info.e75.set_bad_data(); - cnt_info.e90.set_bad_data(); - cnt_info.eiqr.set_bad_data(); - cnt_info.mad.set_bad_data(); - cnt_info.n_ranks = 0; - cnt_info.frank_ties = 0; - cnt_info.orank_ties = 0; - - // Get partial sums - n = (aflag ? s.sacount : s.scount); - fbar = (aflag ? s.fabar : s.fbar); - obar = (aflag ? s.oabar : s.obar); - fobar = (aflag ? s.foabar : s.fobar); - ffbar = (aflag ? s.ffabar : s.ffbar); - oobar = (aflag ? s.ooabar : s.oobar); + // Check for consistent counts + if(s.scount > 0 && s.sacount > 0 && + s.scount != s.sacount) { + mlog << Error << "\ncompute_cntinfo() -> " + << "the scalar partial sum and scalar anomaly partial sum " + << "counts are both non-zero but do not match (" + << s.scount << " != " << s.sacount << ").\n\n"; + exit(1); + } // Number of matched pairs + int n = max(s.scount, s.sacount); cnt_info.n = n; - // Forecast mean and standard deviation - cnt_info.fbar.v = fbar; - cnt_info.fstdev.v = compute_stdev(fbar*n, ffbar*n, n); - - // Observation mean and standard deviation - cnt_info.obar.v = obar; - cnt_info.ostdev.v = compute_stdev(obar*n, oobar*n, n); - - // Multiplicative bias - cnt_info.mbias.v = (is_eq(obar, 0.0) ? bad_data_double : fbar/obar); - - // Correlation coefficient - - // Handle SAL1L2 data - if(aflag) { - cnt_info.pr_corr.v = bad_data_double; - cnt_info.anom_corr.v = compute_corr( fbar*n, obar*n, - ffbar*n, oobar*n, - fobar*n, n); - cnt_info.rmsfa.v = sqrt(ffbar); - cnt_info.rmsoa.v = sqrt(oobar); - cnt_info.anom_corr_uncntr.v = compute_anom_corr_uncntr(ffbar, oobar, - fobar); - } - // Handle SL1L2 data - else { - cnt_info.pr_corr.v = compute_corr( fbar*n, obar*n, - ffbar*n, oobar*n, - fobar*n, n); - cnt_info.anom_corr.v = bad_data_double; - cnt_info.rmsfa.v = bad_data_double; - cnt_info.rmsoa.v = bad_data_double; - cnt_info.anom_corr_uncntr.v = bad_data_double; - } + // Process scalar partial sum statistics + if(s.scount > 0) { - // Compute mean error - cnt_info.me.v = fbar - obar; + // Forecast mean and standard deviation + cnt_info.fbar.v = s.fbar; + cnt_info.fstdev.v = compute_stdev(s.fbar*n, s.ffbar*n, n); - // Compute mean error squared - cnt_info.me2.v = cnt_info.me.v * cnt_info.me.v; + // Observation mean and standard deviation + cnt_info.obar.v = s.obar; + cnt_info.ostdev.v = compute_stdev(s.obar*n, s.oobar*n, n); - // Compute mean absolute error - cnt_info.mae.v = s.smae; + // Multiplicative bias + cnt_info.mbias.v = (is_eq(s.obar, 0.0) ? bad_data_double : s.fbar/s.obar); - // Compute mean squared error - cnt_info.mse.v = ffbar + oobar - 2.0*fobar; + // Correlation coefficient + cnt_info.pr_corr.v = compute_corr( s.fbar*n, s.obar*n, + s.ffbar*n, s.oobar*n, + s.fobar*n, n); - // Compute mean squared error skill score - den = cnt_info.ostdev.v * cnt_info.ostdev.v; - if(!is_eq(den, 0.0)) { - cnt_info.msess.v = 1.0 - cnt_info.mse.v / den; - } - else { - cnt_info.msess.v = bad_data_double; - } + // Compute mean error + cnt_info.me.v = s.fbar - s.obar; - // Compute standard deviation of the mean error - cnt_info.estdev.v = compute_stdev(cnt_info.me.v*n, - cnt_info.mse.v*n, n); + // Compute mean error squared + cnt_info.me2.v = cnt_info.me.v * cnt_info.me.v; - // Compute bias corrected mean squared error (decomposition of MSE) - cnt_info.bcmse.v = cnt_info.mse.v - (fbar - obar)*(fbar - obar); + // Compute mean absolute error + cnt_info.mae.v = s.smae; - // Compute root mean squared error - cnt_info.rmse.v = sqrt(cnt_info.mse.v); + // Compute mean squared error + cnt_info.mse.v = s.ffbar + s.oobar - 2.0*s.fobar; - // Compute Scatter Index (SI) - if(!is_eq(cnt_info.obar.v, 0.0)) { - cnt_info.si.v = cnt_info.rmse.v / cnt_info.obar.v; + // Compute mean squared error skill score + double den = cnt_info.ostdev.v * cnt_info.ostdev.v; + if(!is_eq(den, 0.0)) { + cnt_info.msess.v = 1.0 - cnt_info.mse.v / den; + } + else { + cnt_info.msess.v = bad_data_double; + } + + // Compute standard deviation of the mean error + cnt_info.estdev.v = compute_stdev(cnt_info.me.v*n, + cnt_info.mse.v*n, n); + + // Compute bias corrected mean squared error (decomposition of MSE) + cnt_info.bcmse.v = cnt_info.mse.v - (s.fbar - s.obar)*(s.fbar - s.obar); + + // Compute root mean squared error + cnt_info.rmse.v = sqrt(cnt_info.mse.v); + + // Compute Scatter Index (SI) + if(!is_eq(cnt_info.obar.v, 0.0)) { + cnt_info.si.v = cnt_info.rmse.v / cnt_info.obar.v; + } + else { + cnt_info.si.v = bad_data_double; + } } - else { - cnt_info.si.v = bad_data_double; + + // Process scalar anomaly partial sum statistics + if(s.sacount > 0) { + cnt_info.anom_corr.v = compute_corr( s.fabar*n, s.oabar*n, + s.ffabar*n, s.ooabar*n, + s.foabar*n, n); + cnt_info.rmsfa.v = sqrt(s.ffabar); + cnt_info.rmsoa.v = sqrt(s.ooabar); + cnt_info.anom_corr_uncntr.v = compute_anom_corr_uncntr(s.ffabar, s.ooabar, + s.foabar); } - + // Compute normal confidence intervals cnt_info.compute_ci(); @@ -772,10 +757,23 @@ void compute_pctinfo(const PairDataPoint &pd, bool pstd_flag, // Use input climatological probabilities or derive them if(cmn_flag) { - if(cprob_in) climo_prob = *cprob_in; - else climo_prob = derive_climo_prob(pd.cdf_info_ptr, - pd.ocmn_na, pd.ocsd_na, - pct_info.othresh); + + // Use climatological probabilities direclty, if supplied + if(cprob_in) { + climo_prob = *cprob_in; + } + // Use observation climatology data, if available + else if(pd.ocmn_na.n() > 0) { + climo_prob = derive_climo_prob(pd.cdf_info_ptr, + pd.ocmn_na, pd.ocsd_na, + pct_info.othresh); + } + // Otherwise, try using forecast climatology data + else { + climo_prob = derive_climo_prob(pd.cdf_info_ptr, + pd.fcmn_na, pd.fcsd_na, + pct_info.othresh); + } } // diff --git a/src/libcode/vx_statistics/compute_stats.h b/src/libcode/vx_statistics/compute_stats.h index 556afcd1e8..1649cdcec2 100644 --- a/src/libcode/vx_statistics/compute_stats.h +++ b/src/libcode/vx_statistics/compute_stats.h @@ -24,8 +24,7 @@ // //////////////////////////////////////////////////////////////////////// -extern void compute_cntinfo(const SL1L2Info &, bool, CNTInfo &); - +extern void compute_cntinfo(const SL1L2Info &, CNTInfo &); extern void compute_cntinfo(const PairDataPoint &, const NumArray &, bool, bool, bool, CNTInfo &); extern void compute_i_cntinfo(const PairDataPoint &, int, diff --git a/src/libcode/vx_statistics/contable.h b/src/libcode/vx_statistics/contable.h index cae46f880a..8c7823c4d7 100644 --- a/src/libcode/vx_statistics/contable.h +++ b/src/libcode/vx_statistics/contable.h @@ -193,6 +193,13 @@ class Nx2ContingencyTable : public ContingencyTable { double threshold(int index) const; // 0 <= index <= Nrows + // + // set counts + // + + void set_event(int row, int count); + void set_nonevent(int row, int count); + // // increment counts // diff --git a/src/libcode/vx_statistics/contable_nx2.cc b/src/libcode/vx_statistics/contable_nx2.cc index 9af0358511..c69ec38545 100644 --- a/src/libcode/vx_statistics/contable_nx2.cc +++ b/src/libcode/vx_statistics/contable_nx2.cc @@ -187,7 +187,8 @@ void Nx2ContingencyTable::set_size(int NR, int NC) if ( NC != 2 ) { - mlog << Error << "\nNx2ContingencyTable::set_size(int, int) -> must have 2 columns!\n\n"; + mlog << Error << "\nNx2ContingencyTable::set_size(int, int) -> " + << "must have 2 columns!\n\n"; exit ( 1 ); @@ -209,7 +210,8 @@ int Nx2ContingencyTable::value_to_row(double t) const if ( !Thresholds ) { - mlog << Error << "\nNx2ContingencyTable::value_to_row(double) const -> thresholds array not set!\n\n"; + mlog << Error << "\nNx2ContingencyTable::value_to_row(double) const -> " + << "thresholds array not set!\n\n"; exit ( 1 ); @@ -246,7 +248,8 @@ void Nx2ContingencyTable::set_thresholds(const double * Values) if ( E->empty() ) { - mlog << Error << "\nNx2ContingencyTable::set_thresholds(const double *) -> table empty!\n\n"; + mlog << Error << "\nNx2ContingencyTable::set_thresholds(const double *) -> " + << "table empty!\n\n"; exit ( 1 ); @@ -272,7 +275,8 @@ double Nx2ContingencyTable::threshold(int k) const if ( !Thresholds ) { - mlog << Error << "\nNx2ContingencyTable::threshold(int) const -> no thresholds set!\n\n"; + mlog << Error << "\nNx2ContingencyTable::threshold(int) const -> " + << "no thresholds set!\n\n"; exit ( 1 ); @@ -280,7 +284,8 @@ if ( !Thresholds ) { if ( (k < 0) || (k > Nrows) ) { // there are Nrows + 1 thresholds - mlog << Error << "\nNx2ContingencyTable::threshold(int) const -> range check error\n\n"; + mlog << Error << "\nNx2ContingencyTable::threshold(int) const -> " + << "range check error\n\n"; exit ( 1 ); @@ -290,6 +295,51 @@ return Thresholds[k]; } +//////////////////////////////////////////////////////////////////////// + + +void Nx2ContingencyTable::set_event(int row, int value) + +{ + +if ( row < 0 || row >= Nrows ) { + + mlog << Error << "\nNx2ContingencyTable::set_event(double) -> " + << "bad row index ... " << row << "\n\n"; + + exit ( 1 ); + +} + +set_entry(row, nx2_event_column, value); + +return; + +} + + +//////////////////////////////////////////////////////////////////////// + + +void Nx2ContingencyTable::set_nonevent(int row, int value) + +{ + +if ( row < 0 || row >= Nrows ) { + + mlog << Error << "\nNx2ContingencyTable::set_nonevent(double) -> " + << "bad row index ... " << row << "\n\n"; + + exit ( 1 ); + +} + +set_entry(row, nx2_nonevent_column, value); + +return; + +} + //////////////////////////////////////////////////////////////////////// @@ -304,7 +354,8 @@ r = value_to_row(t); if ( r < 0 ) { - mlog << Error << "\nNx2ContingencyTable::inc_event(double) -> bad value ... " << t << "\n\n"; + mlog << Error << "\nNx2ContingencyTable::inc_event(double) -> " + << "bad value ... " << t << "\n\n"; exit ( 1 ); @@ -330,7 +381,8 @@ r = value_to_row(t); if ( r < 0 ) { - mlog << Error << "\nNx2ContingencyTable::inc_nonevent(double) -> bad value ... " << t << "\n\n"; + mlog << Error << "\nNx2ContingencyTable::inc_nonevent(double) -> " + << "bad value ... " << t << "\n\n"; exit ( 1 ); @@ -356,7 +408,8 @@ r = value_to_row(t); if ( r < 0 ) { - mlog << Error << "\nNx2ContingencyTable::event_count_by_thresh(double) -> bad value ... " << t << "\n\n"; + mlog << Error << "\nNx2ContingencyTable::event_count_by_thresh(double) -> " + << "bad value ... " << t << "\n\n"; exit ( 1 ); @@ -384,7 +437,8 @@ r = value_to_row(t); if ( r < 0 ) { - mlog << Error << "\nNx2ContingencyTable::nonevent_count_by_thresh(double) -> bad value ... " << t << "\n\n"; + mlog << Error << "\nNx2ContingencyTable::nonevent_count_by_thresh(double) -> " + << "bad value ... " << t << "\n\n"; exit ( 1 ); @@ -446,7 +500,8 @@ double Nx2ContingencyTable::row_proby(int row) const if ( (row < 0) || (row >= Nrows) ) { - mlog << Error << "\nNx2ContingencyTable::row_proby(int) const -> range check error\n\n"; + mlog << Error << "\nNx2ContingencyTable::row_proby(int) const -> " + << "range check error\n\n"; exit ( 1 ); @@ -693,7 +748,8 @@ double Nx2ContingencyTable::row_calibration(int row) const if ( (row < 0) || (row >= Nrows) ) { - mlog << Error << "\nNx2ContingencyTable::row_calibration(int) const -> range check error\n\n"; + mlog << Error << "\nNx2ContingencyTable::row_calibration(int) const -> " + << "range check error\n\n"; exit ( 1 ); @@ -723,7 +779,8 @@ double Nx2ContingencyTable::row_refinement(int row) const if ( (row < 0) || (row >= Nrows) ) { - mlog << Error << "\nNx2ContingencyTable::row_refinement(int) const -> range check error\n\n"; + mlog << Error << "\nNx2ContingencyTable::row_refinement(int) const -> " + << "range check error\n\n"; exit ( 1 ); @@ -755,7 +812,8 @@ double Nx2ContingencyTable::row_event_likelihood(int row) const if ( (row < 0) || (row >= Nrows) ) { - mlog << Error << "\nNx2ContingencyTable::row_event_likelihood(int) const -> range check error\n\n"; + mlog << Error << "\nNx2ContingencyTable::row_event_likelihood(int) const -> " + << "range check error\n\n"; exit ( 1 ); @@ -784,7 +842,8 @@ double Nx2ContingencyTable::row_nonevent_likelihood(int row) const if ( (row < 0) || (row >= Nrows) ) { - mlog << Error << "\nNx2ContingencyTable::row_nonevent_likelihood(int) const -> range check error\n\n"; + mlog << Error << "\nNx2ContingencyTable::row_nonevent_likelihood(int) const -> " + << "range check error\n\n"; exit ( 1 ); @@ -815,7 +874,8 @@ TTContingencyTable tt; if ( (row < 0) || (row >= Nrows) ) { - mlog << Error << "\nNx2ContingencyTable::ctc_by_row(int) const -> range check error\n\n"; + mlog << Error << "\nNx2ContingencyTable::ctc_by_row(int) const -> " + << "range check error\n\n"; exit ( 1 ); diff --git a/src/libcode/vx_statistics/met_stats.cc b/src/libcode/vx_statistics/met_stats.cc index 4c679aed83..0ab9a05188 100644 --- a/src/libcode/vx_statistics/met_stats.cc +++ b/src/libcode/vx_statistics/met_stats.cc @@ -425,41 +425,208 @@ void CTSInfo::compute_ci() { //////////////////////////////////////////////////////////////////////// -double CTSInfo::get_stat(const char *stat_name) { +void CTSInfo::set_stat_ctc(const string &stat_name, double v) { + + if(stat_name == "FY_OY") cts.set_fy_oy(nint(v)); + else if(stat_name == "FY_ON") cts.set_fy_on(nint(v)); + else if(stat_name == "FN_OY") cts.set_fn_oy(nint(v)); + else if(stat_name == "FN_ON") cts.set_fn_on(nint(v)); + else if(stat_name == "EC_VALUE") cts.set_ec_value(v); + + return; +} + +//////////////////////////////////////////////////////////////////////// + +double CTSInfo::get_stat(STATLineType lt, const string &stat_name, int i_alpha) const { double v = bad_data_double; - // Find the statistic by name - if(strcmp(stat_name, "TOTAL" ) == 0) v = cts.n(); - else if(strcmp(stat_name, "BASER" ) == 0) v = cts.baser(); - else if(strcmp(stat_name, "FMEAN" ) == 0) v = cts.fmean(); - else if(strcmp(stat_name, "ACC" ) == 0) v = cts.accuracy(); - else if(strcmp(stat_name, "FBIAS" ) == 0) v = cts.fbias(); - else if(strcmp(stat_name, "PODY" ) == 0) v = cts.pod_yes(); - else if(strcmp(stat_name, "PODN" ) == 0) v = cts.pod_no(); - else if(strcmp(stat_name, "POFD" ) == 0) v = cts.pofd(); - else if(strcmp(stat_name, "FAR" ) == 0) v = cts.far(); - else if(strcmp(stat_name, "CSI" ) == 0) v = cts.csi(); - else if(strcmp(stat_name, "GSS" ) == 0) v = cts.gss(); - else if(strcmp(stat_name, "HK" ) == 0) v = cts.hk(); - else if(strcmp(stat_name, "HSS" ) == 0) v = cts.hss(); - else if(strcmp(stat_name, "HSS_EC") == 0) v = cts.gheidke_ec(cts.ec_value()); - else if(strcmp(stat_name, "ODDS" ) == 0) v = cts.odds(); - else if(strcmp(stat_name, "LODDS" ) == 0) v = cts.lodds(); - else if(strcmp(stat_name, "ORSS" ) == 0) v = cts.orss(); - else if(strcmp(stat_name, "EDS" ) == 0) v = cts.eds(); - else if(strcmp(stat_name, "SEDS" ) == 0) v = cts.seds(); - else if(strcmp(stat_name, "EDI" ) == 0) v = cts.edi(); - else if(strcmp(stat_name, "SEDI" ) == 0) v = cts.sedi(); - else if(strcmp(stat_name, "BAGSS" ) == 0) v = cts.bagss(); + // Get statistic by line type + if(lt == STATLineType::fho) v = get_stat_fho(stat_name); + else if(lt == STATLineType::ctc) v = get_stat_ctc(stat_name); + else if(lt == STATLineType::cts) v = get_stat_cts(stat_name, i_alpha); else { mlog << Error << "\nCTSInfo::get_stat() -> " + << "unexpected line type \"" << statlinetype_to_string(lt) + << "\"!\n\n"; + exit(1); + } + + return v; +} + +//////////////////////////////////////////////////////////////////////// + +double CTSInfo::get_stat_fho(const string &stat_name) const { + double v = bad_data_double; + + // Find the statistic by name + if(stat_name == "TOTAL" ) v = cts.n(); + else if(stat_name == "F_RATE") v = cts.f_rate(); + else if(stat_name == "H_RATE") v = cts.h_rate(); + else if(stat_name == "O_RATE") v = cts.o_rate(); + else { + mlog << Error << "\nCTSInfo::get_stat_fho() -> " + << "unknown categorical statistic name \"" << stat_name + << "\"!\n\n"; + exit(1); + } + + // Return bad data for 0 pairs + if(cts.n() == 0 && stat_name != "TOTAL") { + v = bad_data_double; + } + + return v; +} + +//////////////////////////////////////////////////////////////////////// + +double CTSInfo::get_stat_ctc(const string &stat_name) const { + double v = bad_data_double; + + // Find the statistic by name + if(stat_name == "TOTAL" ) v = cts.n(); + else if(stat_name == "FY_OY" ) v = cts.fy_oy(); + else if(stat_name == "FY_ON" ) v = cts.fy_on(); + else if(stat_name == "FN_OY" ) v = cts.fn_oy(); + else if(stat_name == "FN_ON" ) v = cts.fn_on(); + else if(stat_name == "EC_VALUE") v = cts.ec_value(); + else { + mlog << Error << "\nCTSInfo::get_stat_ctc() -> " + << "unknown categorical statistic name \"" << stat_name + << "\"!\n\n"; + exit(1); + } + + // Return bad data for 0 pairs + if(cts.n() == 0 && stat_name != "TOTAL") { + v = bad_data_double; + } + + return v; +} + +//////////////////////////////////////////////////////////////////////// + +double CTSInfo::get_stat_cts(const string &stat_name, int i_alpha) const { + double v = bad_data_double; + + // Range check alpha index + if(i_alpha >= n_alpha && is_ci_stat_name(stat_name)) { + mlog << Error << "\nCTSInfo::get_stat_cts() -> " + << "alpha index out of range (" << i_alpha << " >= " + << n_alpha << ")!\n\n"; + exit(1); + } + + // Find the statistic by name + if(stat_name == "TOTAL" ) v = (double) cts.n(); + else if(stat_name == "BASER" ) v = baser.v; + else if(stat_name == "BASER_NCL" ) v = baser.v_ncl[i_alpha]; + else if(stat_name == "BASER_NCU" ) v = baser.v_ncu[i_alpha]; + else if(stat_name == "BASER_BCL" ) v = baser.v_bcl[i_alpha]; + else if(stat_name == "BASER_BCU" ) v = baser.v_bcu[i_alpha]; + else if(stat_name == "FMEAN" ) v = fmean.v; + else if(stat_name == "FMEAN_NCL" ) v = fmean.v_ncl[i_alpha]; + else if(stat_name == "FMEAN_NCU" ) v = fmean.v_ncu[i_alpha]; + else if(stat_name == "FMEAN_BCL" ) v = fmean.v_bcl[i_alpha]; + else if(stat_name == "FMEAN_BCU" ) v = fmean.v_bcu[i_alpha]; + else if(stat_name == "ACC" ) v = acc.v; + else if(stat_name == "ACC_NCL" ) v = acc.v_ncl[i_alpha]; + else if(stat_name == "ACC_NCU" ) v = acc.v_ncu[i_alpha]; + else if(stat_name == "ACC_BCL" ) v = acc.v_bcl[i_alpha]; + else if(stat_name == "ACC_BCU" ) v = acc.v_bcu[i_alpha]; + else if(stat_name == "FBIAS" ) v = fbias.v; + else if(stat_name == "FBIAS_BCL" ) v = fbias.v_bcl[i_alpha]; + else if(stat_name == "FBIAS_BCU" ) v = fbias.v_bcu[i_alpha]; + else if(stat_name == "PODY" ) v = pody.v; + else if(stat_name == "PODY_NCL" ) v = pody.v_ncl[i_alpha]; + else if(stat_name == "PODY_NCU" ) v = pody.v_ncu[i_alpha]; + else if(stat_name == "PODY_BCL" ) v = pody.v_bcl[i_alpha]; + else if(stat_name == "PODY_BCU" ) v = pody.v_bcu[i_alpha]; + else if(stat_name == "PODN" ) v = podn.v; + else if(stat_name == "PODN_NCL" ) v = podn.v_ncl[i_alpha]; + else if(stat_name == "PODN_NCU" ) v = podn.v_ncu[i_alpha]; + else if(stat_name == "PODN_BCL" ) v = podn.v_bcl[i_alpha]; + else if(stat_name == "PODN_BCU" ) v = podn.v_bcu[i_alpha]; + else if(stat_name == "POFD" ) v = pofd.v; + else if(stat_name == "POFD_NCL" ) v = pofd.v_ncl[i_alpha]; + else if(stat_name == "POFD_NCU" ) v = pofd.v_ncu[i_alpha]; + else if(stat_name == "POFD_BCL" ) v = pofd.v_bcl[i_alpha]; + else if(stat_name == "POFD_BCU" ) v = pofd.v_bcu[i_alpha]; + else if(stat_name == "FAR" ) v = far.v; + else if(stat_name == "FAR_NCL" ) v = far.v_ncl[i_alpha]; + else if(stat_name == "FAR_NCU" ) v = far.v_ncu[i_alpha]; + else if(stat_name == "FAR_BCL" ) v = far.v_bcl[i_alpha]; + else if(stat_name == "FAR_BCU" ) v = far.v_bcu[i_alpha]; + else if(stat_name == "CSI" ) v = csi.v; + else if(stat_name == "CSI_NCL" ) v = csi.v_ncl[i_alpha]; + else if(stat_name == "CSI_NCU" ) v = csi.v_ncu[i_alpha]; + else if(stat_name == "CSI_BCL" ) v = csi.v_bcl[i_alpha]; + else if(stat_name == "CSI_BCU" ) v = csi.v_bcu[i_alpha]; + else if(stat_name == "GSS" ) v = gss.v; + else if(stat_name == "GSS_BCL" ) v = gss.v_bcl[i_alpha]; + else if(stat_name == "GSS_BCU" ) v = gss.v_bcu[i_alpha]; + else if(stat_name == "HK" ) v = hk.v; + else if(stat_name == "HK_NCL" ) v = hk.v_ncl[i_alpha]; + else if(stat_name == "HK_NCU" ) v = hk.v_ncu[i_alpha]; + else if(stat_name == "HK_BCL" ) v = hk.v_bcl[i_alpha]; + else if(stat_name == "HK_BCU" ) v = hk.v_bcu[i_alpha]; + else if(stat_name == "HSS" ) v = hss.v; + else if(stat_name == "HSS_BCL" ) v = hss.v_bcl[i_alpha]; + else if(stat_name == "HSS_BCU" ) v = hss.v_bcu[i_alpha]; + else if(stat_name == "ODDS" ) v = odds.v; + else if(stat_name == "ODDS_NCL" ) v = odds.v_ncl[i_alpha]; + else if(stat_name == "ODDS_NCU" ) v = odds.v_ncu[i_alpha]; + else if(stat_name == "ODDS_BCL" ) v = odds.v_bcl[i_alpha]; + else if(stat_name == "ODDS_BCU" ) v = odds.v_bcu[i_alpha]; + else if(stat_name == "LODDS" ) v = lodds.v; + else if(stat_name == "LODDS_NCL" ) v = lodds.v_ncl[i_alpha]; + else if(stat_name == "LODDS_NCU" ) v = lodds.v_ncu[i_alpha]; + else if(stat_name == "LODDS_BCL" ) v = lodds.v_bcl[i_alpha]; + else if(stat_name == "LODDS_BCU" ) v = lodds.v_bcu[i_alpha]; + else if(stat_name == "ORSS" ) v = orss.v; + else if(stat_name == "ORSS_NCL" ) v = orss.v_ncl[i_alpha]; + else if(stat_name == "ORSS_NCU" ) v = orss.v_ncu[i_alpha]; + else if(stat_name == "ORSS_BCL" ) v = orss.v_bcl[i_alpha]; + else if(stat_name == "ORSS_BCU" ) v = orss.v_bcu[i_alpha]; + else if(stat_name == "EDS" ) v = eds.v; + else if(stat_name == "EDS_NCL" ) v = eds.v_ncl[i_alpha]; + else if(stat_name == "EDS_NCU" ) v = eds.v_ncu[i_alpha]; + else if(stat_name == "EDS_BCL" ) v = eds.v_bcl[i_alpha]; + else if(stat_name == "EDS_BCU" ) v = eds.v_bcu[i_alpha]; + else if(stat_name == "SEDS" ) v = seds.v; + else if(stat_name == "SEDS_NCL" ) v = seds.v_ncl[i_alpha]; + else if(stat_name == "SEDS_NCU" ) v = seds.v_ncu[i_alpha]; + else if(stat_name == "SEDS_BCL" ) v = seds.v_bcl[i_alpha]; + else if(stat_name == "SEDS_BCU" ) v = seds.v_bcu[i_alpha]; + else if(stat_name == "EDI" ) v = edi.v; + else if(stat_name == "EDI_NCL" ) v = edi.v_ncl[i_alpha]; + else if(stat_name == "EDI_NCU" ) v = edi.v_ncu[i_alpha]; + else if(stat_name == "EDI_BCL" ) v = edi.v_bcl[i_alpha]; + else if(stat_name == "EDI_BCU" ) v = edi.v_bcu[i_alpha]; + else if(stat_name == "SEDI" ) v = sedi.v; + else if(stat_name == "SEDI_NCL" ) v = sedi.v_ncl[i_alpha]; + else if(stat_name == "SEDI_NCU" ) v = sedi.v_ncu[i_alpha]; + else if(stat_name == "SEDI_BCL" ) v = sedi.v_bcl[i_alpha]; + else if(stat_name == "SEDI_BCU" ) v = sedi.v_bcu[i_alpha]; + else if(stat_name == "BAGSS" ) v = bagss.v; + else if(stat_name == "BAGSS_BCL" ) v = bagss.v_bcl[i_alpha]; + else if(stat_name == "BAGSS_BCU" ) v = bagss.v_bcu[i_alpha]; + else if(stat_name == "HSS_EC" ) v = hss_ec.v; + else if(stat_name == "HSS_EC_BCL") v = hss_ec.v_bcl[i_alpha]; + else if(stat_name == "HSS_EC_BCU") v = hss_ec.v_bcu[i_alpha]; + else if(stat_name == "EC_VALUE" ) v = cts.ec_value(); + else { + mlog << Error << "\nCTSInfo::get_stat_cts() -> " << "unknown categorical statistic name \"" << stat_name << "\"!\n\n"; exit(1); } // Return bad data for 0 pairs - if(cts.n() == 0 && strcmp(stat_name, "TOTAL") != 0) { + if(cts.n() == 0 && stat_name != "TOTAL") { v = bad_data_double; } @@ -653,6 +820,122 @@ void MCTSInfo::compute_ci() { return; } +//////////////////////////////////////////////////////////////////////// + +double MCTSInfo::get_stat(STATLineType lt, + const string &stat_name, + ConcatString &col_name, + int i_alpha) const { + double v = bad_data_double; + + // Initialize + col_name = stat_name; + + // Get statistic by line type + if(lt == STATLineType::mctc) v = get_stat_mctc(stat_name, col_name); + else if(lt == STATLineType::mcts) v = get_stat_mcts(stat_name, i_alpha); + else { + mlog << Error << "\nMCTSInfo::get_stat() -> " + << "unexpected line type \"" << statlinetype_to_string(lt) + << "\"!\n\n"; + exit(1); + } + + return v; +} + +//////////////////////////////////////////////////////////////////////// + +double MCTSInfo::get_stat_mctc(const string &stat_name, + ConcatString &col_name) const { + double v = bad_data_double; + col_name = stat_name; + + // Find the statistic by name + if(stat_name == "TOTAL" ) v = (double) cts.total(); + else if(stat_name == "N_CAT" ) v = (double) cts.nrows(); + else if(stat_name == "EC_VALUE") v = cts.ec_value(); + else if(check_reg_exp("F[0-9]*_O[0-9]*", stat_name.c_str())) { + + col_name = "FI_OJ"; + + // Parse column name to retrieve index values + ConcatString cs(stat_name); + StringArray sa = cs.split("_"); + int i = atoi(sa[0].c_str()+1) - 1; + int j = atoi(sa[1].c_str()+1) - 1; + + // Range check + if(i < 0 || i >= cts.nrows() || + j < 0 || j >= cts.ncols()) { + mlog << Error << "\nget_stat_mctc() -> " + << "range check error for column name requested \"" << stat_name + << "\"\n\n"; + exit(1); + } + + // Retrieve the value + v = (double) cts.entry(i, j); + } + else { + mlog << Error << "\nMCTSInfo::get_stat_mctc() -> " + << "unknown multi-category statistic name \"" << stat_name + << "\"!\n\n"; + exit(1); + } + + return v; +} + +//////////////////////////////////////////////////////////////////////// + +double MCTSInfo::get_stat_mcts(const string &stat_name, int i_alpha) const { + double v = bad_data_double; + + // Range check alpha index + if(i_alpha >= n_alpha && is_ci_stat_name(stat_name)) { + mlog << Error << "\nMCTSInfo::get_stat_mcts() -> " + << "alpha index out of range (" << i_alpha << " >= " + << n_alpha << ")!\n\n"; + exit(1); + } + + // Find the statistic by name + if(stat_name == "TOTAL" ) v = (double) cts.total(); + else if(stat_name == "N_CAT" ) v = (double) cts.nrows(); + else if(stat_name == "ACC" ) v = acc.v; + else if(stat_name == "ACC_NCL" ) v = acc.v_ncl[i_alpha]; + else if(stat_name == "ACC_NCU" ) v = acc.v_ncu[i_alpha]; + else if(stat_name == "ACC_BCL" ) v = acc.v_bcl[i_alpha]; + else if(stat_name == "ACC_BCU" ) v = acc.v_bcu[i_alpha]; + else if(stat_name == "HK" ) v = hk.v; + else if(stat_name == "HK_BCL" ) v = hk.v_bcl[i_alpha]; + else if(stat_name == "HK_BCU" ) v = hk.v_bcu[i_alpha]; + else if(stat_name == "HSS" ) v = hss.v; + else if(stat_name == "HSS_BCL" ) v = hss.v_bcl[i_alpha]; + else if(stat_name == "HSS_BCU" ) v = hss.v_bcu[i_alpha]; + else if(stat_name == "GER" ) v = ger.v; + else if(stat_name == "GER_BCL" ) v = ger.v_bcl[i_alpha]; + else if(stat_name == "GER_BCU" ) v = ger.v_bcu[i_alpha]; + else if(stat_name == "HSS_EC" ) v = hss_ec.v; + else if(stat_name == "HSS_EC_BCL") v = hss_ec.v_bcl[i_alpha]; + else if(stat_name == "HSS_EC_BCU") v = hss_ec.v_bcu[i_alpha]; + else if(stat_name == "EC_VALUE" ) v = cts.ec_value(); + else { + mlog << Error << "\nMCTSInfo::get_stat_mcts() -> " + << "unknown multi-category statistic name \"" << stat_name + << "\"!\n\n"; + exit(1); + } + + // Return bad data for 0 pairs + if(cts.total() == 0 && stat_name != "TOTAL") { + v = bad_data_double; + } + + return v; +} + //////////////////////////////////////////////////////////////////////// // // Code for class CNTInfo @@ -702,6 +985,44 @@ void CNTInfo::init_from_scratch() { //////////////////////////////////////////////////////////////////////// +void CNTInfo::zero_out() { + + fbar.set_bad_data(); + fstdev.set_bad_data(); + obar.set_bad_data(); + ostdev.set_bad_data(); + pr_corr.set_bad_data(); + sp_corr.set_bad_data(); + kt_corr.set_bad_data(); + anom_corr.set_bad_data(); + rmsfa.set_bad_data(); + rmsoa.set_bad_data(); + anom_corr_uncntr.set_bad_data(); + me.set_bad_data(); + me2.set_bad_data(); + estdev.set_bad_data(); + mbias.set_bad_data(); + mae.set_bad_data(); + mse.set_bad_data(); + msess.set_bad_data(); + bcmse.set_bad_data(); + rmse.set_bad_data(); + si.set_bad_data(); + e10.set_bad_data(); + e25.set_bad_data(); + e50.set_bad_data(); + e75.set_bad_data(); + e90.set_bad_data(); + eiqr.set_bad_data(); + mad.set_bad_data(); + + n_ranks = frank_ties = orank_ties = 0; + + return; +} + +//////////////////////////////////////////////////////////////////////// + void CNTInfo::clear() { n = 0; @@ -1023,42 +1344,121 @@ void CNTInfo::compute_ci() { //////////////////////////////////////////////////////////////////////// -double CNTInfo::get_stat(const char *stat_name) { +double CNTInfo::get_stat(const string &stat_name, int i_alpha) const { double v = bad_data_double; + // Range check alpha index + if(i_alpha >= n_alpha && is_ci_stat_name(stat_name)) { + mlog << Error << "\nCNTInfo::get_stat() -> " + << "alpha index out of range (" << i_alpha << " >= " + << n_alpha << ")!\n\n"; + exit(1); + } + // Find the statistic by name - if(strcmp(stat_name, "TOTAL" ) == 0) v = n; - else if(strcmp(stat_name, "FBAR" ) == 0) v = fbar.v; - else if(strcmp(stat_name, "FSTDEV" ) == 0) v = fstdev.v; - else if(strcmp(stat_name, "OBAR" ) == 0) v = obar.v; - else if(strcmp(stat_name, "OSTDEV" ) == 0) v = ostdev.v; - else if(strcmp(stat_name, "PR_CORR" ) == 0) v = pr_corr.v; - else if(strcmp(stat_name, "SP_CORR" ) == 0) v = sp_corr.v; - else if(strcmp(stat_name, "KT_CORR" ) == 0) v = kt_corr.v; - else if(strcmp(stat_name, "RANKS" ) == 0) v = n_ranks; - else if(strcmp(stat_name, "FRANK_TIES" ) == 0) v = frank_ties; - else if(strcmp(stat_name, "ORANK_TIES" ) == 0) v = orank_ties; - else if(strcmp(stat_name, "ME" ) == 0) v = me.v; - else if(strcmp(stat_name, "ESTDEV" ) == 0) v = estdev.v; - else if(strcmp(stat_name, "MBIAS" ) == 0) v = mbias.v; - else if(strcmp(stat_name, "MAE" ) == 0) v = mae.v; - else if(strcmp(stat_name, "MSE" ) == 0) v = mse.v; - else if(strcmp(stat_name, "BCMSE" ) == 0) v = bcmse.v; - else if(strcmp(stat_name, "RMSE" ) == 0) v = rmse.v; - else if(strcmp(stat_name, "SI" ) == 0) v = si.v; - else if(strcmp(stat_name, "E10" ) == 0) v = e10.v; - else if(strcmp(stat_name, "E25" ) == 0) v = e25.v; - else if(strcmp(stat_name, "E50" ) == 0) v = e50.v; - else if(strcmp(stat_name, "E75" ) == 0) v = e75.v; - else if(strcmp(stat_name, "E90" ) == 0) v = e90.v; - else if(strcmp(stat_name, "EIQR" ) == 0) v = eiqr.v; - else if(strcmp(stat_name, "MAD " ) == 0) v = mad.v; - else if(strcmp(stat_name, "ANOM_CORR" ) == 0) v = anom_corr.v; - else if(strcmp(stat_name, "ME2" ) == 0) v = me2.v; - else if(strcmp(stat_name, "MSESS" ) == 0) v = msess.v; - else if(strcmp(stat_name, "RMSFA" ) == 0) v = rmsfa.v; - else if(strcmp(stat_name, "RMSOA" ) == 0) v = rmsoa.v; - else if(strcmp(stat_name, "ANOM_CORR_UNCNTR") == 0) v = anom_corr_uncntr.v; + if(stat_name == "TOTAL" ) v = (double) n; + else if(stat_name == "FBAR" ) v = fbar.v; + else if(stat_name == "FBAR_NCL" ) v = fbar.v_ncl[i_alpha]; + else if(stat_name == "FBAR_NCU" ) v = fbar.v_ncu[i_alpha]; + else if(stat_name == "FBAR_BCL" ) v = fbar.v_bcl[i_alpha]; + else if(stat_name == "FBAR_BCU" ) v = fbar.v_bcu[i_alpha]; + else if(stat_name == "FSTDEV" ) v = fstdev.v; + else if(stat_name == "FSTDEV_NCL" ) v = fstdev.v_ncl[i_alpha]; + else if(stat_name == "FSTDEV_NCU" ) v = fstdev.v_ncu[i_alpha]; + else if(stat_name == "FSTDEV_BCL" ) v = fstdev.v_bcl[i_alpha]; + else if(stat_name == "FSTDEV_BCU" ) v = fstdev.v_bcu[i_alpha]; + else if(stat_name == "OBAR" ) v = obar.v; + else if(stat_name == "OBAR_NCL" ) v = obar.v_ncl[i_alpha]; + else if(stat_name == "OBAR_NCU" ) v = obar.v_ncu[i_alpha]; + else if(stat_name == "OBAR_BCL" ) v = obar.v_bcl[i_alpha]; + else if(stat_name == "OBAR_BCU" ) v = obar.v_bcu[i_alpha]; + else if(stat_name == "OSTDEV" ) v = ostdev.v; + else if(stat_name == "OSTDEV_NCL" ) v = ostdev.v_ncl[i_alpha]; + else if(stat_name == "OSTDEV_NCU" ) v = ostdev.v_ncu[i_alpha]; + else if(stat_name == "OSTDEV_BCL" ) v = ostdev.v_bcl[i_alpha]; + else if(stat_name == "OSTDEV_BCU" ) v = ostdev.v_bcu[i_alpha]; + else if(stat_name == "PR_CORR" ) v = pr_corr.v; + else if(stat_name == "PR_CORR_NCL" ) v = pr_corr.v_ncl[i_alpha]; + else if(stat_name == "PR_CORR_NCU" ) v = pr_corr.v_ncu[i_alpha]; + else if(stat_name == "PR_CORR_BCL" ) v = pr_corr.v_bcl[i_alpha]; + else if(stat_name == "PR_CORR_BCU" ) v = pr_corr.v_bcu[i_alpha]; + else if(stat_name == "SP_CORR" ) v = sp_corr.v; + else if(stat_name == "KT_CORR" ) v = kt_corr.v; + else if(stat_name == "RANKS" ) v = n_ranks; + else if(stat_name == "FRANK_TIES" ) v = frank_ties; + else if(stat_name == "ORANK_TIES" ) v = orank_ties; + else if(stat_name == "ME" ) v = me.v; + else if(stat_name == "ME_NCL" ) v = me.v_ncl[i_alpha]; + else if(stat_name == "ME_NCU" ) v = me.v_ncu[i_alpha]; + else if(stat_name == "ME_BCL" ) v = me.v_bcl[i_alpha]; + else if(stat_name == "ME_BCU" ) v = me.v_bcu[i_alpha]; + else if(stat_name == "ESTDEV" ) v = estdev.v; + else if(stat_name == "ESTDEV_NCL" ) v = estdev.v_ncl[i_alpha]; + else if(stat_name == "ESTDEV_NCU" ) v = estdev.v_ncu[i_alpha]; + else if(stat_name == "ESTDEV_BCL" ) v = estdev.v_bcl[i_alpha]; + else if(stat_name == "ESTDEV_BCU" ) v = estdev.v_bcu[i_alpha]; + else if(stat_name == "MBIAS" ) v = mbias.v; + else if(stat_name == "MBIAS_BCL" ) v = mbias.v_bcl[i_alpha]; + else if(stat_name == "MBIAS_BCU" ) v = mbias.v_bcu[i_alpha]; + else if(stat_name == "MAE" ) v = mae.v; + else if(stat_name == "MAE_BCL" ) v = mae.v_bcl[i_alpha]; + else if(stat_name == "MAE_BCU" ) v = mae.v_bcu[i_alpha]; + else if(stat_name == "MSE" ) v = mse.v; + else if(stat_name == "MSE_BCL" ) v = mse.v_bcl[i_alpha]; + else if(stat_name == "MSE_BCU" ) v = mse.v_bcu[i_alpha]; + else if(stat_name == "BCMSE" ) v = bcmse.v; + else if(stat_name == "BCMSE_BCL" ) v = bcmse.v_bcl[i_alpha]; + else if(stat_name == "BCMSE_BCU" ) v = bcmse.v_bcu[i_alpha]; + else if(stat_name == "RMSE" ) v = rmse.v; + else if(stat_name == "RMSE_BCL" ) v = rmse.v_bcl[i_alpha]; + else if(stat_name == "RMSE_BCU" ) v = rmse.v_bcu[i_alpha]; + else if(stat_name == "SI" ) v = si.v; + else if(stat_name == "SI_BCL" ) v = si.v_bcl[i_alpha]; + else if(stat_name == "SI_BCU" ) v = si.v_bcu[i_alpha]; + else if(stat_name == "E10" ) v = e10.v; + else if(stat_name == "E10_BCL" ) v = e10.v_bcl[i_alpha]; + else if(stat_name == "E10_BCU" ) v = e10.v_bcu[i_alpha]; + else if(stat_name == "E25" ) v = e25.v; + else if(stat_name == "E25_BCL" ) v = e25.v_bcl[i_alpha]; + else if(stat_name == "E25_BCU" ) v = e25.v_bcu[i_alpha]; + else if(stat_name == "E50" ) v = e50.v; + else if(stat_name == "E50_BCL" ) v = e50.v_bcl[i_alpha]; + else if(stat_name == "E50_BCU" ) v = e50.v_bcu[i_alpha]; + else if(stat_name == "E75" ) v = e75.v; + else if(stat_name == "E75_BCL" ) v = e75.v_bcl[i_alpha]; + else if(stat_name == "E75_BCU" ) v = e75.v_bcu[i_alpha]; + else if(stat_name == "E90" ) v = e90.v; + else if(stat_name == "E90_BCL" ) v = e90.v_bcl[i_alpha]; + else if(stat_name == "E90_BCU" ) v = e90.v_bcu[i_alpha]; + else if(stat_name == "EIQR" ) v = eiqr.v; + else if(stat_name == "EIQR_BCL" ) v = eiqr.v_bcl[i_alpha]; + else if(stat_name == "EIQR_BCU" ) v = eiqr.v_bcu[i_alpha]; + else if(stat_name == "MAD" ) v = mad.v; + else if(stat_name == "MAD_BCL" ) v = mad.v_bcl[i_alpha]; + else if(stat_name == "MAD_BCU" ) v = mad.v_bcu[i_alpha]; + else if(stat_name == "ANOM_CORR" ) v = anom_corr.v; + else if(stat_name == "ANOM_CORR_NCL" ) v = anom_corr.v_ncl[i_alpha]; + else if(stat_name == "ANOM_CORR_NCU" ) v = anom_corr.v_ncu[i_alpha]; + else if(stat_name == "ANOM_CORR_BCL" ) v = anom_corr.v_bcl[i_alpha]; + else if(stat_name == "ANOM_CORR_BCU" ) v = anom_corr.v_bcu[i_alpha]; + else if(stat_name == "ME2" ) v = me2.v; + else if(stat_name == "ME2_BCL" ) v = me2.v_bcl[i_alpha]; + else if(stat_name == "ME2_BCU" ) v = me2.v_bcu[i_alpha]; + else if(stat_name == "MSESS" ) v = msess.v; + else if(stat_name == "MSESS_BCL" ) v = msess.v_bcl[i_alpha]; + else if(stat_name == "MSESS_BCU" ) v = msess.v_bcu[i_alpha]; + else if(stat_name == "RMSFA" ) v = rmsfa.v; + else if(stat_name == "RMSFA_BCL" ) v = rmsfa.v_bcl[i_alpha]; + else if(stat_name == "RMSFA_BCU" ) v = rmsfa.v_bcu[i_alpha]; + else if(stat_name == "RMSOA" ) v = rmsoa.v; + else if(stat_name == "RMSOA_BCL" ) v = rmsoa.v_bcl[i_alpha]; + else if(stat_name == "RMSOA_BCU" ) v = rmsoa.v_bcu[i_alpha]; + else if(stat_name == "ANOM_CORR_UNCNTR" ) v = anom_corr_uncntr.v; + else if(stat_name == "ANOM_CORR_UNCNTR_BCL") v = anom_corr_uncntr.v_bcl[i_alpha]; + else if(stat_name == "ANOM_CORR_UNCNTR_BCU") v = anom_corr_uncntr.v_bcu[i_alpha]; + else if(stat_name == "SI" ) v = si.v; + else if(stat_name == "SI_BCL" ) v = si.v_bcl[i_alpha]; + else if(stat_name == "SI_BCU" ) v = si.v_bcu[i_alpha]; else { mlog << Error << "\nCNTInfo::get_stat() -> " << "unknown continuous statistic name \"" << stat_name @@ -1067,7 +1467,7 @@ double CNTInfo::get_stat(const char *stat_name) { } // Return bad data for 0 pairs - if(n == 0 && strcmp(stat_name, "TOTAL") != 0) { + if(n == 0 && stat_name != "TOTAL") { v = bad_data_double; } @@ -1113,7 +1513,8 @@ SL1L2Info & SL1L2Info::operator=(const SL1L2Info &c) { //////////////////////////////////////////////////////////////////////// SL1L2Info & SL1L2Info::operator+=(const SL1L2Info &c) { - SL1L2Info s_info; + SL1L2Info s_info = *this; + s_info.zero_out(); s_info.scount = scount + c.scount; @@ -1305,6 +1706,110 @@ void SL1L2Info::set(const PairDataPoint &pd_all) { return; } +//////////////////////////////////////////////////////////////////////// + +void SL1L2Info::set_stat_sl1l2(const string &stat_name, double v) { + + if(stat_name == "TOTAL") scount = nint(v); + else if(stat_name == "FBAR" ) fbar = v; + else if(stat_name == "OBAR" ) obar = v; + else if(stat_name == "FOBAR") fobar = v; + else if(stat_name == "FFBAR") ffbar = v; + else if(stat_name == "OOBAR") oobar = v; + else if(stat_name == "MAE" ) smae = v; + else { + mlog << Error << "\nSL1L2Info::set_stat_sl1l2() -> " + << "unknown scalar partial sum statistic name \"" << stat_name + << "\"!\n\n"; + exit(1); + } + + return; +} + +//////////////////////////////////////////////////////////////////////// + +void SL1L2Info::set_stat_sal1l2(const string &stat_name, double v) { + + if(stat_name == "TOTAL" ) sacount = nint(v); + else if(stat_name == "FABAR" ) fabar = v; + else if(stat_name == "OABAR" ) oabar = v; + else if(stat_name == "FOABAR") foabar = v; + else if(stat_name == "FFABAR") ffabar = v; + else if(stat_name == "OOABAR") ooabar = v; + else if(stat_name == "MAE" ) samae = v; + else { + mlog << Error << "\nSL1L2Info::set_stat_sal1l2() -> " + << "unknown scalar anomaly partial sum statistic name \"" << stat_name + << "\"!\n\n"; + exit(1); + } + + return; +} + +//////////////////////////////////////////////////////////////////////// + +double SL1L2Info::get_stat(STATLineType lt, const string &stat_name) const { + double v = bad_data_double; + + // Get statistic by line type + if(lt == STATLineType::sl1l2) v = get_stat_sl1l2(stat_name); + else if(lt == STATLineType::sal1l2) v = get_stat_sal1l2(stat_name); + else { + mlog << Error << "\nSL1L2Info::get_stat() -> " + << "unexpected line type \"" << statlinetype_to_string(lt) + << "\"!\n\n"; + exit(1); + } + + return v; +} + +//////////////////////////////////////////////////////////////////////// + +double SL1L2Info::get_stat_sl1l2(const string &stat_name) const { + double v = bad_data_double; + + if(stat_name == "TOTAL") v = (double) scount; + else if(stat_name == "FBAR" ) v = fbar; + else if(stat_name == "OBAR" ) v = obar; + else if(stat_name == "FOBAR") v = fobar; + else if(stat_name == "FFBAR") v = ffbar; + else if(stat_name == "OOBAR") v = oobar; + else if(stat_name == "MAE" ) v = smae; + else { + mlog << Error << "\nSL1L2Info::get_stat_sl1l2() -> " + << "unknown scalar partial sum statistic name \"" << stat_name + << "\"!\n\n"; + exit(1); + } + + return v; +} + +//////////////////////////////////////////////////////////////////////// + +double SL1L2Info::get_stat_sal1l2(const string &stat_name) const { + double v = bad_data_double; + + if(stat_name == "TOTAL" ) v = (double) sacount; + else if(stat_name == "FABAR" ) v = fabar; + else if(stat_name == "OABAR" ) v = oabar; + else if(stat_name == "FOABAR") v = foabar; + else if(stat_name == "FFABAR") v = ffabar; + else if(stat_name == "OOABAR") v = ooabar; + else if(stat_name == "MAE" ) v = samae; + else { + mlog << Error << "\nSL1L2Info::get_stat_sal1l2() -> " + << "unknown scalar anomaly partial sum statistic name \"" << stat_name + << "\"!\n\n"; + exit(1); + } + + return v; +} + //////////////////////////////////////////////////////////////////////// // // Code for class VL1L2Info @@ -1344,11 +1849,8 @@ VL1L2Info & VL1L2Info::operator=(const VL1L2Info &c) { //////////////////////////////////////////////////////////////////////// VL1L2Info & VL1L2Info::operator+=(const VL1L2Info &c) { - VL1L2Info v_info; - - // Store alpha values - v_info.allocate_n_alpha(n_alpha); - for(int i=0; i " + << "unknown vector partial sums statistic name \"" << stat_name + << "\"!\n\n"; + exit(1); + } + + return v; +} //////////////////////////////////////////////////////////////////////// -double VL1L2Info::get_stat(const char *stat_name) { +double VL1L2Info::get_stat_val1l2(const string &stat_name) const { double v = bad_data_double; - if(strcmp(stat_name, "TOTAL" ) == 0) v = vcount; - else if(strcmp(stat_name, "FBAR" ) == 0) v = FBAR.v; - else if(strcmp(stat_name, "OBAR" ) == 0) v = OBAR.v; - else if(strcmp(stat_name, "FS_RMS" ) == 0) v = FS_RMS.v; - else if(strcmp(stat_name, "OS_RMS" ) == 0) v = OS_RMS.v; - else if(strcmp(stat_name, "MSVE" ) == 0) v = MSVE.v; - else if(strcmp(stat_name, "RMSVE" ) == 0) v = RMSVE.v; - else if(strcmp(stat_name, "FSTDEV" ) == 0) v = FSTDEV.v; - else if(strcmp(stat_name, "OSTDEV" ) == 0) v = OSTDEV.v; - else if(strcmp(stat_name, "FDIR" ) == 0) v = FDIR.v; - else if(strcmp(stat_name, "ODIR" ) == 0) v = ODIR.v; - else if(strcmp(stat_name, "FBAR_SPEED" ) == 0) v = FBAR_SPEED.v; - else if(strcmp(stat_name, "OBAR_SPEED" ) == 0) v = OBAR_SPEED.v; - else if(strcmp(stat_name, "VDIFF_SPEED" ) == 0) v = VDIFF_SPEED.v; - else if(strcmp(stat_name, "VDIFF_DIR" ) == 0) v = VDIFF_DIR.v; - else if(strcmp(stat_name, "SPEED_ERR" ) == 0) v = SPEED_ERR.v; - else if(strcmp(stat_name, "SPEED_ABSERR" ) == 0) v = SPEED_ABSERR.v; - else if(strcmp(stat_name, "DIR_ERR" ) == 0) v = DIR_ERR.v; - else if(strcmp(stat_name, "DIR_ABSERR" ) == 0) v = DIR_ABSERR.v; - else if(strcmp(stat_name, "ANOM_CORR" ) == 0) v = ANOM_CORR.v; - else if(strcmp(stat_name, "ANOM_CORR_UNCNTR") == 0) v = ANOM_CORR_UNCNTR.v; - else if(strcmp(stat_name, "DIR_ME" ) == 0) v = DIR_ME.v; - else if(strcmp(stat_name, "DIR_MAE" ) == 0) v = DIR_MAE.v; - else if(strcmp(stat_name, "DIR_MSE" ) == 0) v = DIR_MSE.v; - else if(strcmp(stat_name, "DIR_RMSE" ) == 0) v = DIR_RMSE.v; + // Find the statistic by name + if(stat_name == "TOTAL" ) v = vacount; + else if(stat_name == "UFABAR" ) v = ufa_bar; + else if(stat_name == "VFABAR" ) v = vfa_bar; + else if(stat_name == "UOABAR" ) v = uoa_bar; + else if(stat_name == "VOABAR" ) v = voa_bar; + else if(stat_name == "UVFOABAR" ) v = uvfoa_bar; + else if(stat_name == "UVFFABAR" ) v = uvffa_bar; + else if(stat_name == "UVOOABAR" ) v = uvooa_bar; + else if(stat_name == "FA_SPEED_BAR") v = fa_speed_bar; + else if(stat_name == "OA_SPEED_BAR") v = oa_speed_bar; + else if(stat_name == "TOTAL_DIR" ) v = dacount; + else if(stat_name == "DIRA_ME" ) v = dira_bar; + else if(stat_name == "DIRA_MAE" ) v = absdira_bar; + else if(stat_name == "DIRA_MSE" ) v = dira2_bar; else { mlog << Error << "\nVL1L2Info::get_stat() -> " - << "unknown continuous statistic name \"" << stat_name + << "unknown vector anomaly partial sums statistic name \"" << stat_name + << "\"!\n\n"; + exit(1); + } + + return v; +} + +//////////////////////////////////////////////////////////////////////// + +double VL1L2Info::get_stat_vcnt(const string &stat_name) const { + double v = bad_data_double; + + if(stat_name == "TOTAL" ) v = vcount; + else if(stat_name == "FBAR" ) v = FBAR.v; + else if(stat_name == "OBAR" ) v = OBAR.v; + else if(stat_name == "FS_RMS" ) v = FS_RMS.v; + else if(stat_name == "OS_RMS" ) v = OS_RMS.v; + else if(stat_name == "MSVE" ) v = MSVE.v; + else if(stat_name == "RMSVE" ) v = RMSVE.v; + else if(stat_name == "FSTDEV" ) v = FSTDEV.v; + else if(stat_name == "OSTDEV" ) v = OSTDEV.v; + else if(stat_name == "FDIR" ) v = FDIR.v; + else if(stat_name == "ODIR" ) v = ODIR.v; + else if(stat_name == "FBAR_SPEED" ) v = FBAR_SPEED.v; + else if(stat_name == "OBAR_SPEED" ) v = OBAR_SPEED.v; + else if(stat_name == "VDIFF_SPEED" ) v = VDIFF_SPEED.v; + else if(stat_name == "VDIFF_DIR" ) v = VDIFF_DIR.v; + else if(stat_name == "SPEED_ERR" ) v = SPEED_ERR.v; + else if(stat_name == "SPEED_ABSERR" ) v = SPEED_ABSERR.v; + else if(stat_name == "DIR_ERR" ) v = DIR_ERR.v; + else if(stat_name == "DIR_ABSERR" ) v = DIR_ABSERR.v; + else if(stat_name == "ANOM_CORR" ) v = ANOM_CORR.v; + else if(stat_name == "ANOM_CORR_UNCNTR") v = ANOM_CORR_UNCNTR.v; + else if(stat_name == "DIR_ME" ) v = DIR_ME.v; + else if(stat_name == "DIR_MAE" ) v = DIR_MAE.v; + else if(stat_name == "DIR_MSE" ) v = DIR_MSE.v; + else if(stat_name == "DIR_RMSE" ) v = DIR_RMSE.v; + else { + mlog << Error << "\nVL1L2Info::get_stat_vcnt() -> " + << "unknown vector continuous statistic name \"" << stat_name << "\"!\n\n"; exit(1); } @@ -2128,7 +2689,8 @@ NBRCNTInfo & NBRCNTInfo::operator=(const NBRCNTInfo &c) { //////////////////////////////////////////////////////////////////////// NBRCNTInfo & NBRCNTInfo::operator+=(const NBRCNTInfo &c) { - NBRCNTInfo n_info; + NBRCNTInfo n_info = *this; + n_info.sl1l2_info.zero_out(); double den; n_info.sl1l2_info.scount = sl1l2_info.scount + c.sl1l2_info.scount; @@ -2756,6 +3318,285 @@ void PCTInfo::compute_ci() { return; } +//////////////////////////////////////////////////////////////////////// + +double PCTInfo::get_stat(STATLineType lt, + const string &stat_name, + ConcatString &col_name, + int i_alpha) const { + double v = bad_data_double; + + // Get statistic by line type + if(lt == STATLineType::pct) v = get_stat_pct(stat_name, col_name); + else if(lt == STATLineType::pjc) v = get_stat_pjc(stat_name, col_name); + else if(lt == STATLineType::prc) v = get_stat_prc(stat_name, col_name); + else if(lt == STATLineType::pstd) v = get_stat_pstd(stat_name, col_name, i_alpha); + else { + mlog << Error << "\nPCTInfo::get_stat() -> " + << "unexpected line type \"" << statlinetype_to_string(lt) + << "\"!\n\n"; + exit(1); + } + + return v; +} + +//////////////////////////////////////////////////////////////////////// + +double PCTInfo::get_stat_pct(const string &stat_name, + ConcatString &col_name) const { + int i = 0; + double v = bad_data_double; + col_name = stat_name; + + // Get index value for variable column numbers + if(check_reg_exp("_[0-9]", stat_name.c_str())) { + + // Parse the index value from the column name + i = atoi(strrchr(stat_name.c_str(), '_') + 1) - 1; + + // Range check (allow THRESH_N for N == nrows) + if(i < 0 || i > pct.nrows()) { + mlog << Error << "\nPCTInfo::get_stat_pct() -> " + << "range check error for column name requested \"" << stat_name + << "\"\n\n"; + exit(1); + } + } // end if + + // Find the statistic by name + if(stat_name == "TOTAL") { + v = (double) pct.n(); + } + else if(stat_name == "N_THRESH") { + v = (double) pct.nrows() + 1; + } + else if(check_reg_exp("THRESH_[0-9]", stat_name.c_str())) { + v = pct.threshold(i); + col_name = "THRESH_I"; + } + else if(check_reg_exp("OY_[0-9]", stat_name.c_str())){ + v = (double) pct.event_count_by_row(i); + col_name = "OY_I"; + } + else if(check_reg_exp("ON_[0-9]", stat_name.c_str())) { + v = (double) pct.nonevent_count_by_row(i); + col_name = "ON_I"; + } + else { + mlog << Error << "\nPCTInfo::get_stat_pct() -> " + << "unsupported column name requested \"" << stat_name + << "\"\n\n"; + exit(1); + } + + return v; +} + +//////////////////////////////////////////////////////////////////////// + +double PCTInfo::get_stat_pjc(const string &stat_name, + ConcatString &col_name) const { + int i = 0; + double v = bad_data_double; + col_name = stat_name; + + // Get index value for variable column numbers + if(check_reg_exp("_[0-9]", stat_name.c_str())) { + + // Parse the index value from the column name + i = atoi(strrchr(stat_name.c_str(), '_') + 1) - 1; + + // Range check + if(i < 0 || i >= pct.nrows()) { + mlog << Error << "\nPCTInfo::get_stat_pjc() -> " + << "range check error for column name requested \"" << stat_name + << "\"\n\n"; + exit(1); + } + } // end if + + // Find the statistic by name + if(stat_name == "TOTAL") { + v = (double) pct.n(); + } + else if(stat_name == "N_THRESH") { + v = (double) pct.nrows() + 1; + } + else if(check_reg_exp("THRESH_[0-9]", stat_name.c_str())) { + v = pct.threshold(i); + col_name = "THRESH_I"; + } + else if(check_reg_exp("OY_TP_[0-9]", stat_name.c_str())) { + v = pct.event_count_by_row(i)/(double) pct.n(); + col_name = "OY_TP_I"; + } + else if(check_reg_exp("ON_TP_[0-9]", stat_name.c_str())) { + v = pct.nonevent_count_by_row(i)/(double) pct.n(); + col_name = "ON_TP_I"; + } + else if(check_reg_exp("CALIBRATION_[0-9]", stat_name.c_str())) { + v = pct.row_calibration(i); + col_name = "CALIBRATION_I"; + } + else if(check_reg_exp("REFINEMENT_[0-9]", stat_name.c_str())) { + v = pct.row_refinement(i); + col_name = "REFINEMENT_I"; + } + else if(check_reg_exp("LIKELIHOOD_[0-9]", stat_name.c_str())) { + v = pct.row_event_likelihood(i); + col_name = "LIKELIHOOD_I"; + } + else if(check_reg_exp("BASER_[0-9]", stat_name.c_str())) { + v = pct.row_obar(i); + col_name = "BASER_I"; + } + else { + mlog << Error << "\nPCTInfo::get_stat_pjc() -> " + << "unsupported column name requested \"" << stat_name + << "\"\n\n"; + exit(1); + } + + // Return bad data for 0 pairs + if(pct.n() == 0 && stat_name != "TOTAL") { + v = bad_data_double; + } + + return v; +} + +//////////////////////////////////////////////////////////////////////// + +double PCTInfo::get_stat_prc(const string &stat_name, + ConcatString &col_name) const { + int i = 0; + double v = bad_data_double; + col_name = stat_name; + TTContingencyTable ct; + + // Get index value for variable column numbers + if(check_reg_exp("_[0-9]", stat_name.c_str())) { + + // Parse the index value from the column name + i = atoi(strrchr(stat_name.c_str(), '_') + 1) - 1; + + // Range check + if(i < 0 || i >= pct.nrows()) { + mlog << Error << "\nPCTInfo::get_stat_prc() -> " + << "range check error for column name requested \"" << stat_name + << "\"\n\n"; + exit(1); + } + + // Get the 2x2 contingency table for this row + ct = pct.ctc_by_row(i); + + } // end if + + // Find the statistic by name + if(stat_name == "TOTAL") { + v = (double) pct.n(); + } + else if(stat_name == "N_THRESH") { + v = (double) pct.nrows() + 1; + } + else if(check_reg_exp("THRESH_[0-9]", stat_name.c_str())) { + v = pct.threshold(i); + col_name = "THRESH_I"; + } + else if(check_reg_exp("PODY_[0-9]", stat_name.c_str())) { + v = ct.pod_yes(); + col_name = "PODY_I"; + } + else if(check_reg_exp("POFD_[0-9]", stat_name.c_str())) { + v = ct.pofd(); + col_name = "POFD_I"; + } + else { + mlog << Error << "\nPCTInfo::get_stat_prc() -> " + << "unsupported column name requested \"" << stat_name + << "\"\n\n"; + exit(1); + } + + // Return bad data for 0 pairs + if(pct.n() == 0 && stat_name != "TOTAL") { + v = bad_data_double; + } + + return v; +} + +//////////////////////////////////////////////////////////////////////// + +double PCTInfo::get_stat_pstd(const string &stat_name, + ConcatString &col_name, + int i_alpha) const { + int i = 0; + double v = bad_data_double; + col_name = stat_name; + + // Range check alpha index + if(i_alpha >= n_alpha && is_ci_stat_name(stat_name)) { + mlog << Error << "\nPCTInfo::get_stat_pstd() -> " + << "alpha index out of range (" << i_alpha << " >= " + << n_alpha << ")!\n\n"; + exit(1); + } + + // Get index value for variable column numbers + if(check_reg_exp("_[0-9]", stat_name.c_str())) { + + // Parse the index value from the column name + i = atoi(strrchr(stat_name.c_str(), '_') + 1) - 1; + + // Range check + if(i < 0 || i >= pct.nrows()) { + mlog << Error << "\nPCTInfo::get_stat_pstd() -> " + << "range check error for column name requested \"" << stat_name + << "\"\n\n"; + exit(1); + } + } // end if + + // Find the statistic by name + if(stat_name == "TOTAL" ) v = (double) pct.n(); + else if(stat_name == "N_THRESH" ) v = (double) pct.nrows() + 1; + else if(stat_name == "BASER" ) v = baser.v; + else if(stat_name == "BASER_NCL" ) v = baser.v_ncl[i_alpha]; + else if(stat_name == "BASER_NCU" ) v = baser.v_ncu[i_alpha]; + else if(stat_name == "RELIABILITY") v = pct.reliability(); + else if(stat_name == "RESOLUTION" ) v = pct.resolution(); + else if(stat_name == "UNCERTAINTY") v = pct.uncertainty(); + else if(stat_name == "ROC_AUC" ) v = pct.roc_auc(); + else if(stat_name == "BRIER" ) v = brier.v; + else if(stat_name == "BRIER_NCL" ) v = brier.v_ncl[i_alpha]; + else if(stat_name == "BRIER_NCU" ) v = brier.v_ncu[i_alpha]; + else if(stat_name == "BRIERCL" ) v = briercl.v; + else if(stat_name == "BRIERCL_NCL") v = briercl.v_ncl[i_alpha]; + else if(stat_name == "BRIERCL_NCU") v = briercl.v_ncu[i_alpha]; + else if(stat_name == "BSS" ) v = bss; + else if(stat_name == "BSS_SMPL" ) v = bss_smpl; + else if(check_reg_exp("THRESH_[0-9]", stat_name.c_str())) { + v = pct.threshold(i); + col_name = "THRESH_I"; + } + else { + mlog << Error << "\nPCTInfo::get_stat_pstd() -> " + << "unsupported column name requested \"" << stat_name + << "\"\n\n"; + exit(1); + } + + // Return bad data for 0 pairs + if(pct.n() == 0 && stat_name != "TOTAL") { + v = bad_data_double; + } + + return v; +} + //////////////////////////////////////////////////////////////////////// // // Code for class GRADInfo @@ -3627,3 +4468,10 @@ int compute_rank(const DataPlane &dp, DataPlane &dp_rank, double *data_rank, int } //////////////////////////////////////////////////////////////////////// + +bool is_ci_stat_name(const string &stat_name) { + return (stat_name.find("_NC") != string::npos || + stat_name.find("_BC") != string::npos); +} + +//////////////////////////////////////////////////////////////////////// diff --git a/src/libcode/vx_statistics/met_stats.h b/src/libcode/vx_statistics/met_stats.h index f3bef1a90c..c968697dd7 100644 --- a/src/libcode/vx_statistics/met_stats.h +++ b/src/libcode/vx_statistics/met_stats.h @@ -97,7 +97,12 @@ class CTSInfo { void compute_stats(); void compute_ci(); - double get_stat(const char *); + void set_stat_ctc(const std::string &, double); + + double get_stat(STATLineType, const std::string &, int i_alpha=0) const; + double get_stat_fho(const std::string &) const; + double get_stat_ctc(const std::string &) const; + double get_stat_cts(const std::string &, int i_alpha=0) const; }; //////////////////////////////////////////////////////////////////////// @@ -136,6 +141,10 @@ class MCTSInfo { void add(double, double, const ClimoPntInfo *cpi = nullptr); void compute_stats(); void compute_ci(); + + double get_stat(STATLineType, const std::string &, ConcatString &, int i_alpha=0) const; + double get_stat_mctc(const std::string &, ConcatString &) const; + double get_stat_mcts(const std::string &, int i_alpha=0) const; }; //////////////////////////////////////////////////////////////////////// @@ -188,11 +197,13 @@ class CNTInfo { int n_ranks, frank_ties, orank_ties; + void zero_out(); void clear(); + void allocate_n_alpha(int); void compute_ci(); - double get_stat(const char *); + double get_stat(const std::string &, int i_alpha=0) const; }; //////////////////////////////////////////////////////////////////////// @@ -239,6 +250,13 @@ class SL1L2Info { void zero_out(); void clear(); + + void set_stat_sl1l2(const std::string &, double); + void set_stat_sal1l2(const std::string &, double); + + double get_stat(STATLineType, const std::string &) const; + double get_stat_sl1l2(const std::string &) const; + double get_stat_sal1l2(const std::string &) const; }; //////////////////////////////////////////////////////////////////////// @@ -354,15 +372,17 @@ class VL1L2Info { // Compute sums void set(const PairDataPoint &, const PairDataPoint &); - - void clear(); + void zero_out(); + void clear(); void allocate_n_alpha(int); void compute_stats(); void compute_ci(); - double get_stat(const char *); + double get_stat_vl1l2(const std::string &) const; + double get_stat_val1l2(const std::string &) const; + double get_stat_vcnt(const std::string &) const; }; //////////////////////////////////////////////////////////////////////// @@ -502,8 +522,9 @@ class ISCInfo { double baser; double fbias; - void clear(); void zero_out(); + void clear(); + void allocate_n_scale(int); void compute_isc(); void compute_isc(int); @@ -558,6 +579,12 @@ class PCTInfo { void set_fthresh(const ThreshArray &); void compute_stats(); void compute_ci(); + + double get_stat(STATLineType, const std::string &, ConcatString &, int i_alpha=0) const; + double get_stat_pct(const std::string &, ConcatString &) const; + double get_stat_pjc(const std::string &, ConcatString &) const; + double get_stat_prc(const std::string &, ConcatString &) const; + double get_stat_pstd(const std::string &, ConcatString &, int i_alpha=0) const; }; //////////////////////////////////////////////////////////////////////// @@ -737,6 +764,8 @@ extern double compute_ufss(double); extern int compute_rank(const DataPlane &, DataPlane &, double *, int &); +extern bool is_ci_stat_name(const std::string &); + //////////////////////////////////////////////////////////////////////// #endif // __MET_STATS_H__ diff --git a/src/tools/core/series_analysis/series_analysis.cc b/src/tools/core/series_analysis/series_analysis.cc index a2e8cdf5c1..79b46710fe 100644 --- a/src/tools/core/series_analysis/series_analysis.cc +++ b/src/tools/core/series_analysis/series_analysis.cc @@ -36,10 +36,10 @@ // 015 10/03/22 Presotpnik MET #2227 Remove namespace netCDF from header files. // 016 01/29/24 Halley Gotway MET #2801 Configure time difference warnings. // 017 07/05/24 Halley Gotway MET #2924 Support forecast climatology. +// 018 07/26/24 Halley Gotway MET #1371 Aggregate previous output. // //////////////////////////////////////////////////////////////////////// - #include #include #include @@ -65,7 +65,6 @@ using namespace std; using namespace netCDF; - //////////////////////////////////////////////////////////////////////// static void process_command_line(int, char **); @@ -84,32 +83,69 @@ static void get_series_entry(int, VarInfo *, const StringArray &, static bool read_single_entry(VarInfo *, const ConcatString &, const GrdFileType, DataPlane &, Grid &); +static void open_aggr_file(); +static DataPlane read_aggr_data_plane(const ConcatString &, + const char *suggestion=nullptr); + static void process_scores(); -static void do_cts (int, const PairDataPoint *); -static void do_mcts (int, const PairDataPoint *); -static void do_cnt (int, const PairDataPoint *); -static void do_sl1l2 (int, const PairDataPoint *); -static void do_pct (int, const PairDataPoint *); - -static void store_stat_fho (int, const ConcatString &, const CTSInfo &); -static void store_stat_ctc (int, const ConcatString &, const CTSInfo &); -static void store_stat_cts (int, const ConcatString &, const CTSInfo &); -static void store_stat_mctc (int, const ConcatString &, const MCTSInfo &); -static void store_stat_mcts (int, const ConcatString &, const MCTSInfo &); -static void store_stat_cnt (int, const ConcatString &, const CNTInfo &); -static void store_stat_sl1l2 (int, const ConcatString &, const SL1L2Info &); -static void store_stat_sal1l2(int, const ConcatString &, const SL1L2Info &); -static void store_stat_pct (int, const ConcatString &, const PCTInfo &); -static void store_stat_pstd (int, const ConcatString &, const PCTInfo &); -static void store_stat_pjc (int, const ConcatString &, const PCTInfo &); -static void store_stat_prc (int, const ConcatString &, const PCTInfo &); +static void do_categorical (int, const PairDataPoint *); +static void do_multicategory (int, const PairDataPoint *); +static void do_continuous (int, const PairDataPoint *); +static void do_partialsums (int, const PairDataPoint *); +static void do_probabilistic (int, const PairDataPoint *); +static void do_climo_brier (int, double, int, PCTInfo &); + +static int read_aggr_total (int); +static void read_aggr_ctc (int, const CTSInfo &, CTSInfo &); +static void read_aggr_mctc (int, const MCTSInfo &, MCTSInfo &); +static void read_aggr_sl1l2 (int, const SL1L2Info &, SL1L2Info &); +static void read_aggr_sal1l2 (int, const SL1L2Info &, SL1L2Info &); +static void read_aggr_pct (int, const PCTInfo &, PCTInfo &); + +static void store_stat_categorical(int, + STATLineType, const ConcatString &, + const CTSInfo &); +static void store_stat_multicategory(int, + STATLineType, const ConcatString &, + const MCTSInfo &); +static void store_stat_partialsums(int, + STATLineType, const ConcatString &, + const SL1L2Info &); +static void store_stat_continuous(int, + STATLineType, const ConcatString &, + const CNTInfo &); +static void store_stat_probabilistic(int, + STATLineType, const ConcatString &, + const PCTInfo &); + +static void store_stat_all_ctc (int, const CTSInfo &); +static void store_stat_all_mctc (int, const MCTSInfo &); +static void store_stat_all_sl1l2 (int, const SL1L2Info &); +static void store_stat_all_sal1l2(int, const SL1L2Info &); +static void store_stat_all_pct (int, const PCTInfo &); + +static ConcatString build_nc_var_name_categorical( + STATLineType, const ConcatString &, + const CTSInfo &, double); +static ConcatString build_nc_var_name_multicategory( + STATLineType, const ConcatString &, + double); +static ConcatString build_nc_var_name_partialsums( + STATLineType, const ConcatString &, + const SL1L2Info &); +static ConcatString build_nc_var_name_continuous( + STATLineType, const ConcatString &, + const CNTInfo &, double); +static ConcatString build_nc_var_name_probabilistic( + STATLineType, const ConcatString &, + const PCTInfo &, double); static void setup_nc_file(const VarInfo *, const VarInfo *); -static void add_nc_var(const ConcatString &, const ConcatString &, - const ConcatString &, const ConcatString &, - const ConcatString &, double); -static void put_nc_val(int, const ConcatString &, float); +static void add_stat_data(const ConcatString &, const ConcatString &, + const ConcatString &, const ConcatString &, + const ConcatString &, double); +static void write_stat_data(); static void set_range(const unixtime &, unixtime &, unixtime &); static void set_range(const int &, int &, int &); @@ -120,6 +156,7 @@ static void usage(); static void set_fcst_files(const StringArray &); static void set_obs_files(const StringArray &); static void set_both_files(const StringArray &); +static void set_aggr(const StringArray &); static void set_paired(const StringArray &); static void set_out_file(const StringArray &); static void set_config_file(const StringArray &); @@ -166,12 +203,13 @@ void process_command_line(int argc, char **argv) { cline.set_usage(usage); // Add the options function calls - cline.add(set_fcst_files, "-fcst", -1); - cline.add(set_obs_files, "-obs", -1); - cline.add(set_both_files, "-both", -1); - cline.add(set_paired, "-paired", 0); - cline.add(set_config_file, "-config", 1); - cline.add(set_out_file, "-out", 1); + cline.add(set_fcst_files, "-fcst", -1); + cline.add(set_obs_files, "-obs", -1); + cline.add(set_both_files, "-both", -1); + cline.add(set_aggr, "-aggr", 1); + cline.add(set_paired, "-paired", 0); + cline.add(set_config_file, "-config", 1); + cline.add(set_out_file, "-out", 1); cline.add(set_compress, "-compress", 1); // Parse the command line @@ -180,36 +218,43 @@ void process_command_line(int argc, char **argv) { // Check for error. There should be zero arguments left. if(cline.n() != 0) usage(); - // Warn about log output + // Recommend logging verbosity level of 3 or less if(mlog.verbosity_level() >= 3) { - mlog << Warning << "\nRunning Series-Analysis at verbosity >= 3 " + mlog << Debug(3) << "Running Series-Analysis at verbosity >= 3 " << "produces excessive log output and can slow the runtime " - << "considerably.\n\n"; + << "considerably.\n"; } // Check that the required arguments have been set. if(fcst_files.n() == 0) { mlog << Error << "\nprocess_command_line() -> " << "the forecast file list must be set using the " - << "\"-fcst\" or \"-both\" option.\n\n"; + << R"("-fcst" or "-both" option.)" << "\n\n"; usage(); } if(obs_files.n() == 0) { mlog << Error << "\nprocess_command_line() -> " << "the observation file list must be set using the " - << "\"-obs\" or \"-both\" option.\n\n"; + << R"("-obs" or "-both" option.)" << "\n\n"; usage(); } if(config_file.length() == 0) { mlog << Error << "\nprocess_command_line() -> " << "the configuration file must be set using the " - << "\"-config\" option.\n\n"; + << R"("-config" option.)" << "\n\n"; usage(); } if(out_file.length() == 0) { mlog << Error << "\nprocess_command_line() -> " << "the output NetCDF file must be set using the " - << "\"-out\" option.\n\n"; + << R"("-out" option.)" << "\n\n"; + usage(); + } + if(aggr_file == out_file) { + mlog << Error << "\nprocess_command_line() -> " + << R"(the "-out" and "-aggr" options cannot be )" + << R"(set to the same file (")" << aggr_file + << R"(")!)" << "\n\n"; usage(); } @@ -249,9 +294,9 @@ void process_command_line(int argc, char **argv) { // List the lengths of the series options mlog << Debug(1) - << "Length of configuration \"fcst.field\" = " + << R"(Length of configuration "fcst.field" = )" << conf_info.get_n_fcst() << "\n" - << "Length of configuration \"obs.field\" = " + << R"(Length of configuration "obs.field" = )" << conf_info.get_n_obs() << "\n" << "Length of forecast file list = " << fcst_files.n() << "\n" @@ -266,38 +311,38 @@ void process_command_line(int argc, char **argv) { // - Observation file list if(conf_info.get_n_fcst() > 1) { series_type = SeriesType::Fcst_Conf; - n_series = conf_info.get_n_fcst(); + n_series_pair = conf_info.get_n_fcst(); mlog << Debug(1) - << "Series defined by the \"fcst.field\" configuration entry " - << "of length " << n_series << ".\n"; + << R"(Series defined by the "fcst.field" configuration entry )" + << "of length " << n_series_pair << ".\n"; } else if(conf_info.get_n_obs() > 1) { series_type = SeriesType::Obs_Conf; - n_series = conf_info.get_n_obs(); + n_series_pair = conf_info.get_n_obs(); mlog << Debug(1) - << "Series defined by the \"obs.field\" configuration entry " - << "of length " << n_series << ".\n"; + << R"(Series defined by the "obs.field" configuration entry )" + << "of length " << n_series_pair << ".\n"; } else if(fcst_files.n() > 1) { series_type = SeriesType::Fcst_Files; - n_series = fcst_files.n(); + n_series_pair = fcst_files.n(); mlog << Debug(1) << "Series defined by the forecast file list of length " - << n_series << ".\n"; + << n_series_pair << ".\n"; } else if(obs_files.n() > 1) { series_type = SeriesType::Obs_Files; - n_series = obs_files.n(); + n_series_pair = obs_files.n(); mlog << Debug(1) << "Series defined by the observation file list of length " - << n_series << ".\n"; + << n_series_pair << ".\n"; } else { series_type = SeriesType::Fcst_Conf; - n_series = 1; + n_series_pair = 1; mlog << Debug(1) - << "The \"fcst.field\" and \"obs.field\" configuration entries " - << "and the \"-fcst\" and \"-obs\" command line options " + << R"(The "fcst.field" and "obs.field" configuration entries )" + << R"(and the "-fcst" and "-obs" command line options )" << "all have length one.\n"; } @@ -307,7 +352,7 @@ void process_command_line(int argc, char **argv) { // The number of forecast and observation files must match. if(fcst_files.n() != obs_files.n()) { mlog << Error << "\nprocess_command_line() -> " - << "when using the \"-paired\" command line option, the " + << R"(when using the "-paired" command line option, the )" << "number of forecast (" << fcst_files.n() << ") and observation (" << obs_files.n() << ") files must match.\n\n"; @@ -315,24 +360,24 @@ void process_command_line(int argc, char **argv) { } // The number of files must match the series length. - if(fcst_files.n() != n_series) { + if(fcst_files.n() != n_series_pair) { mlog << Error << "\nprocess_command_line() -> " - << "when using the \"-paired\" command line option, the " + << R"(when using the "-paired" command line option, the )" << "the file list length (" << fcst_files.n() - << ") and series length (" << n_series + << ") and series length (" << n_series_pair << ") must match.\n\n"; usage(); } // Set the series file names to the input file lists - for(i=0; iregrid(), &fcst_grid, &obs_grid); - nxy = grid.nx() * grid.ny(); // Process masking regions conf_info.process_masks(grid); // Set the block size, if needed - if(is_bad_data(conf_info.block_size)) conf_info.block_size = nxy; + if(is_bad_data(conf_info.block_size)) { + conf_info.block_size = grid.nxy(); + } // Compute the number of reads required - n_reads = nint(ceil((double) nxy / conf_info.block_size)); + n_reads = nint(ceil((double) grid.nxy() / conf_info.block_size)); mlog << Debug(2) << "Computing statistics using a block size of " @@ -371,7 +417,7 @@ void process_grid(const Grid &fcst_grid, const Grid &obs_grid) { << "\nA block size of " << conf_info.block_size << " for a " << grid.nx() << " x " << grid.ny() << " grid requires " << n_reads << " passes through the data which will be slow.\n" - << "Consider increasing \"block_size\" in the configuration " + << R"(Consider increasing "block_size" in the configuration )" << "file based on available memory.\n\n"; } @@ -398,8 +444,8 @@ Met2dDataFile *get_mtddf(const StringArray &file_list, // Read first valid file if(!(mtddf = mtddf_factory.new_met_2d_data_file(file_list[i].c_str(), type))) { - mlog << Error << "\nTrouble reading data file \"" - << file_list[i] << "\"\n\n"; + mlog << Error << "\nTrouble reading data file: " + << file_list[i] << "\n\n"; exit(1); } @@ -421,7 +467,7 @@ void get_series_data(int i_series, mlog << Debug(2) << "Processing series entry " << i_series + 1 << " of " - << n_series << ": " << fcst_info->magic_str() + << n_series_pair << ": " << fcst_info->magic_str() << " versus " << obs_info->magic_str() << "\n"; // Switch on the series type @@ -499,7 +545,7 @@ void get_series_data(int i_series, } // Setup the verification grid - if(nxy == 0) process_grid(fcst_grid, obs_grid); + if(!grid.is_set()) process_grid(fcst_grid, obs_grid); // Regrid the forecast, if necessary if(!(fcst_grid == grid)) { @@ -510,12 +556,12 @@ void get_series_data(int i_series, << "disabled:\n" << fcst_grid.serialize() << " !=\n" << grid.serialize() << "\nSpecify regridding logic in the config file " - << "\"regrid\" section.\n\n"; + << R"("regrid" section.)" << "\n\n"; exit(1); } - mlog << Debug(1) - << "Regridding field " << fcst_info->magic_str() + mlog << Debug(2) + << "Regridding forecast " << fcst_info->magic_str() << " to the verification grid.\n"; fcst_dp = met_regrid(fcst_dp, fcst_grid, grid, fcst_info->regrid()); @@ -530,12 +576,12 @@ void get_series_data(int i_series, << "disabled:\n" << obs_grid.serialize() << " !=\n" << grid.serialize() << "\nSpecify regridding logic in the config file " - << "\"regrid\" section.\n\n"; + << R"("regrid" section.)" << "\n\n"; exit(1); } - mlog << Debug(1) - << "Regridding field " << obs_info->magic_str() + mlog << Debug(2) + << "Regridding observation " << obs_info->magic_str() << " to the verification grid.\n"; obs_dp = met_regrid(obs_dp, obs_grid, grid, obs_info->regrid()); @@ -564,7 +610,8 @@ void get_series_data(int i_series, << cs << "\n\n"; } else { - mlog << Debug(3) << cs << "\n"; + mlog << Debug(3) + << cs << "\n"; } } @@ -578,7 +625,6 @@ void get_series_entry(int i_series, VarInfo *info, const GrdFileType type, StringArray &found_files, DataPlane &dp, Grid &cur_grid) { - int i, j; bool found = false; // Initialize @@ -588,10 +634,10 @@ void get_series_entry(int i_series, VarInfo *info, if(found_files[i_series].length() == 0) { // Loop through the file list - for(i=0; i " << "Could not find data for " << info->magic_str() << " in file list:\n"; - for(i=0; i " + << "unable to open the aggregate NetCDF file: " + << aggr_file << "\n\n"; + exit(1); + } + + // Update timing info based on aggregate file global attributes + ConcatString cs; + + if(get_att_value_string(aggr_nc.MetNc->Nc, "fcst_init_beg", cs)) { + set_range(timestring_to_unix(cs.c_str()), fcst_init_beg, fcst_init_end); + } + if(get_att_value_string(aggr_nc.MetNc->Nc, "fcst_init_end", cs)) { + set_range(timestring_to_unix(cs.c_str()), fcst_init_beg, fcst_init_end); + } + if(get_att_value_string(aggr_nc.MetNc->Nc, "fcst_valid_beg", cs)) { + set_range(timestring_to_unix(cs.c_str()), fcst_valid_beg, fcst_valid_end); + } + if(get_att_value_string(aggr_nc.MetNc->Nc, "fcst_valid_end", cs)) { + set_range(timestring_to_unix(cs.c_str()), fcst_valid_beg, fcst_valid_end); + } + if(get_att_value_string(aggr_nc.MetNc->Nc, "fcst_lead_beg", cs)) { + set_range(timestring_to_sec(cs.c_str()), fcst_lead_beg, fcst_lead_end); + } + if(get_att_value_string(aggr_nc.MetNc->Nc, "fcst_lead_end", cs)) { + set_range(timestring_to_sec(cs.c_str()), fcst_lead_beg, fcst_lead_end); + } + if(get_att_value_string(aggr_nc.MetNc->Nc, "obs_init_beg", cs)) { + set_range(timestring_to_unix(cs.c_str()), obs_init_beg, obs_init_end); + } + if(get_att_value_string(aggr_nc.MetNc->Nc, "obs_init_end", cs)) { + set_range(timestring_to_unix(cs.c_str()), obs_init_beg, obs_init_end); + } + if(get_att_value_string(aggr_nc.MetNc->Nc, "obs_valid_beg", cs)) { + set_range(timestring_to_unix(cs.c_str()), obs_valid_beg, obs_valid_end); + } + if(get_att_value_string(aggr_nc.MetNc->Nc, "obs_valid_end", cs)) { + set_range(timestring_to_unix(cs.c_str()), obs_valid_beg, obs_valid_end); + } + if(get_att_value_string(aggr_nc.MetNc->Nc, "obs_lead_beg", cs)) { + set_range(timestring_to_sec(cs.c_str()), obs_lead_beg, obs_lead_end); + } + if(get_att_value_string(aggr_nc.MetNc->Nc, "obs_lead_end", cs)) { + set_range(timestring_to_sec(cs.c_str()), obs_lead_beg, obs_lead_end); + } + + // Store the aggregate series length + n_series_aggr = get_int_var(aggr_nc.MetNc->Nc, n_series_var_name, 0); + + mlog << Debug(3) + << "Aggregation series has length " << n_series_aggr << ".\n"; + + return; +} + +//////////////////////////////////////////////////////////////////////// + +DataPlane read_aggr_data_plane(const ConcatString &var_name, + const char *suggestion) { + DataPlane aggr_dp; + + // Setup the data request + VarInfoNcMet aggr_info; + aggr_info.set_magic(var_name, "(*,*)"); + + mlog << Debug(2) + << R"(Reading aggregation ")" + << aggr_info.magic_str() + << R"(" field.)" << "\n"; + + // Attempt to read the gridded data from the current file + if(!aggr_nc.data_plane(aggr_info, aggr_dp)) { + mlog << Error << "\nread_aggr_data_plane() -> " + << R"(Required variable ")" << aggr_info.magic_str() + << R"(" not found in the aggregate file!)" << "\n\n"; + if(suggestion) { + mlog << Error + << R"(Recommend recreating ")" << aggr_file + << R"(" to request that )" << suggestion + << " column(s) be written.\n\n"; + } + exit(1); + } + + // Check that the grid has not changed + if(aggr_nc.grid().nx() != grid.nx() || + aggr_nc.grid().ny() != grid.ny()) { + mlog << Error << "\nread_aggr_data_plane() -> " + << "the input grid dimensions (" << grid.nx() << ", " << grid.ny() + << ") and aggregate grid dimensions (" << aggr_nc.grid().nx() + << ", " << aggr_nc.grid().ny() << ") do not match!\n\n"; + exit(1); + } + + return aggr_dp; +} + +//////////////////////////////////////////////////////////////////////// + void process_scores() { - int i, x, y, i_read, i_series, i_point, i_fcst; + int x; + int y; + int i_point = 0; VarInfo *fcst_info = (VarInfo *) nullptr; VarInfo *obs_info = (VarInfo *) nullptr; - PairDataPoint *pd_ptr = (PairDataPoint *) nullptr; - DataPlane fcst_dp, obs_dp; + DataPlane fcst_dp; + DataPlane obs_dp; + vector pd_block; const char *method_name = "process_scores() "; // Climatology mean and standard deviation DataPlane fcmn_dp, fcsd_dp; DataPlane ocmn_dp, ocsd_dp; + // Open the aggregate file, if needed + if(aggr_file.nonempty()) open_aggr_file(); + // Number of points skipped due to valid data threshold - int n_skip_zero = 0; - int n_skip_pos = 0; + int n_skip_zero_vld = 0; + int n_skip_some_vld = 0; // Loop over the data reads - for(i_read=0; i_read 1 ? i_series : 0); + int i_fcst = (conf_info.get_n_fcst() > 1 ? i_series : 0); // Store the current VarInfo objects fcst_info = conf_info.fcst_info[i_fcst]; @@ -715,18 +871,20 @@ void process_scores() { // Retrieve the data planes for the current series entry get_series_data(i_series, fcst_info, obs_info, fcst_dp, obs_dp); - // Allocate PairDataPoint objects, if needed - if(!pd_ptr) { - pd_ptr = new PairDataPoint [conf_info.block_size]; - for(i=0; imagic_str() << ", found " - << (fcmn_flag ? 0 : 1) << " forecast climatology mean and " - << (fcsd_flag ? 0 : 1) << " standard deviation field(s), and " - << (ocmn_flag ? 0 : 1) << " observation climatology mean and " - << (ocsd_flag ? 0 : 1) << " standard deviation field(s).\n"; + << (fcmn_flag ? 1 : 0) << " forecast climatology mean and " + << (fcsd_flag ? 1 : 0) << " standard deviation field(s), and " + << (ocmn_flag ? 1 : 0) << " observation climatology mean and " + << (ocsd_flag ? 1 : 0) << " standard deviation field(s).\n"; // Setup the output NetCDF file on the first pass - if(nc_out == (NcFile *) 0) setup_nc_file(fcst_info, obs_info); + if(!nc_out) setup_nc_file(fcst_info, obs_info); // Update timing info set_range(fcst_dp.init(), fcst_init_beg, fcst_init_end); @@ -786,7 +940,7 @@ void process_scores() { set_range(obs_dp.lead(), obs_lead_beg, obs_lead_end); // Store matched pairs for each grid point - for(i=0; i 0) { - do_cts(i_point+i, &pd_ptr[i]); + do_categorical(i_point+i, &pd_block[i]); } // Compute multi-category contingency table counts and statistics if(!conf_info.fcst_info[0]->is_prob() && (conf_info.output_stats[STATLineType::mctc].n() + conf_info.output_stats[STATLineType::mcts].n()) > 0) { - do_mcts(i_point+i, &pd_ptr[i]); + do_multicategory(i_point+i, &pd_block[i]); } // Compute continuous statistics if(!conf_info.fcst_info[0]->is_prob() && conf_info.output_stats[STATLineType::cnt].n() > 0) { - do_cnt(i_point+i, &pd_ptr[i]); + do_continuous(i_point+i, &pd_block[i]); } // Compute partial sums if(!conf_info.fcst_info[0]->is_prob() && (conf_info.output_stats[STATLineType::sl1l2].n() + conf_info.output_stats[STATLineType::sal1l2].n()) > 0) { - do_sl1l2(i_point+i, &pd_ptr[i]); + do_partialsums(i_point+i, &pd_block[i]); } // Compute probabilistics counts and statistics @@ -878,22 +1031,16 @@ void process_scores() { conf_info.output_stats[STATLineType::pstd].n() + conf_info.output_stats[STATLineType::pjc].n() + conf_info.output_stats[STATLineType::prc].n()) > 0) { - do_pct(i_point+i, &pd_ptr[i]); + do_probabilistic(i_point+i, &pd_block[i]); } - } // end for i - // Erase the data - for(i=0; i 0 && conf_info.vld_data_thresh == 1.0) { + if(n_skip_some_vld > 0 && conf_info.vld_data_thresh == 1.0) { mlog << Debug(2) << "Some points skipped due to missing data:\n" - << "Consider decreasing \"vld_thresh\" in the config file " + << R"(Consider decreasing "vld_thresh" in the config file )" << "to include more points.\n" - << "Consider requesting \"TOTAL\" from \"output_stats\" " + << R"(Consider requesting "TOTAL" from "output_stats" )" << "in the config file to see the valid data counts.\n"; } @@ -936,30 +1080,56 @@ void process_scores() { //////////////////////////////////////////////////////////////////////// -void do_cts(int n, const PairDataPoint *pd_ptr) { - int i, j; +void do_categorical(int n, const PairDataPoint *pd_ptr) { - mlog << Debug(4) << "Computing Categorical Statistics.\n"; + mlog << Debug(4) + << "Computing Categorical Statistics.\n"; // Allocate objects to store categorical statistics int n_cts = conf_info.fcat_ta.n(); CTSInfo *cts_info = new CTSInfo [n_cts]; // Setup CTSInfo objects - for(i=0; in_obs-1); + + // Loop over the thresholds + for(int i=0; in_obs-1); + + // Compute the current MCTSInfo + compute_mctsinfo(*pd_ptr, i_na, false, false, mcts_info); + + // Read the MCTC data to be aggregated + MCTSInfo aggr_mcts; + read_aggr_mctc(n, mcts_info, aggr_mcts); + + // Aggregate MCTC counts + mcts_info.cts += aggr_mcts.cts; + + // Compute statistics and confidence intervals + mcts_info.compute_stats(); + mcts_info.compute_ci(); + + } // Compute the counts, stats, normal confidence intervals, and // bootstrap confidence intervals - if(conf_info.boot_interval == BootIntervalType::BCA) { + else if(conf_info.boot_interval == BootIntervalType::BCA) { compute_mcts_stats_ci_bca(rng_ptr, *pd_ptr, conf_info.n_boot_rep, mcts_info, true, @@ -1037,15 +1232,17 @@ void do_mcts(int n, const PairDataPoint *pd_ptr) { } // Add statistic value for each possible MCTC column - for(i=0; isubset_pairs_cnt_thresh(cnt_info.fthresh, cnt_info.othresh, - cnt_info.logic); - - // Check for no matched pairs to process - if(pd.n_obs == 0) continue; - - // Compute the stats, normal confidence intervals, and - // bootstrap confidence intervals - int precip_flag = (conf_info.fcst_info[0]->is_precipitation() && - conf_info.obs_info[0]->is_precipitation()); + // Aggregate input pair data with existing partial sums + if(aggr_file.nonempty()) { + + // Compute partial sums from the pair data + SL1L2Info s_info; + s_info.fthresh = cnt_info.fthresh; + s_info.othresh = cnt_info.othresh; + s_info.logic = cnt_info.logic; + s_info.set(*pd_ptr); + + // Aggregate scalar partial sums + SL1L2Info aggr_psum; + read_aggr_sl1l2(n, s_info, aggr_psum); + s_info += aggr_psum; + + // Aggregate scalar anomaly partial sums + if(conf_info.output_stats[STATLineType::cnt].has("ANOM_CORR")) { + read_aggr_sal1l2(n, s_info, aggr_psum); + s_info += aggr_psum; + } - if(conf_info.boot_interval == BootIntervalType::BCA) { - compute_cnt_stats_ci_bca(rng_ptr, pd, - precip_flag, conf_info.rank_corr_flag, - conf_info.n_boot_rep, - cnt_info, conf_info.tmp_dir.c_str()); + // Compute continuous statistics from partial sums + compute_cntinfo(s_info, cnt_info); } + // Compute continuous statistics from the pair data else { - compute_cnt_stats_ci_perc(rng_ptr, pd, - precip_flag, conf_info.rank_corr_flag, - conf_info.n_boot_rep, conf_info.boot_rep_prop, - cnt_info, conf_info.tmp_dir.c_str()); + + // Apply continuous filtering thresholds to subset pairs + pd = pd_ptr->subset_pairs_cnt_thresh(cnt_info.fthresh, cnt_info.othresh, + cnt_info.logic); + + // Check for no matched pairs to process + if(pd.n_obs == 0) continue; + + // Compute the stats, normal confidence intervals, and + // bootstrap confidence intervals + int precip_flag = (conf_info.fcst_info[0]->is_precipitation() && + conf_info.obs_info[0]->is_precipitation()); + + if(conf_info.boot_interval == BootIntervalType::BCA) { + compute_cnt_stats_ci_bca(rng_ptr, pd, + precip_flag, conf_info.rank_corr_flag, + conf_info.n_boot_rep, + cnt_info, conf_info.tmp_dir.c_str()); + } + else { + compute_cnt_stats_ci_perc(rng_ptr, pd, + precip_flag, conf_info.rank_corr_flag, + conf_info.n_boot_rep, conf_info.boot_rep_prop, + cnt_info, conf_info.tmp_dir.c_str()); + } } // Add statistic value for each possible CNT column - for(j=0; j 0) { + read_aggr_sl1l2(n, s_info, aggr_psum); + s_info += aggr_psum; + } + + // Aggregate SAL1L2 partial sums + if(conf_info.output_stats[STATLineType::sal1l2].n() > 0) { + read_aggr_sal1l2(n, s_info, aggr_psum); + s_info += aggr_psum; + } + } + // Add statistic value for each possible SL1L2 column - for(j=0; jNc, &aggr_var_names); - // Setup the PCTInfo object - pct_info.fthresh = conf_info.fcat_ta; - pct_info.allocate_n_alpha(conf_info.ci_alpha.n()); + // Search for one containing TOTAL + for(int i=0; i " + << R"(No variable containing ")" << total_name + << R"(" "found in the aggregate file!)" << "\n\n"; + exit(1); + } } - // Compute PCTInfo for each observation threshold - for(i=0; i " + << "the number of MCTC categories do not match (" + << nint(v) << " != " << aggr_mcts.cts.nrows() << ")!\n\n"; + exit(1); } - } // end for i + // Check the expected correct + else if(c == "EC_VALUE" && !is_bad_data(v) && + !is_eq(v, aggr_mcts.cts.ec_value(), loose_tol)) { + mlog << Error << "\nread_aggr_mctc() -> " + << "the MCTC expected correct values do not match (" + << v << " != " << aggr_mcts.cts.ec_value() << ")!\n\n"; + exit(1); + } + // Populate the MCTC table + else if(check_reg_exp("F[0-9]*_O[0-9]*", c.c_str())) { + StringArray sa(c.split("_")); + int i_row = atoi(sa[0].c_str()+1) - 1; + int i_col = atoi(sa[1].c_str()+1) - 1; + aggr_mcts.cts.set_entry(i_row, i_col, nint(v)); + } + } return; } //////////////////////////////////////////////////////////////////////// -void store_stat_fho(int n, const ConcatString &col, - const CTSInfo &cts_info) { - double v; - ConcatString lty_stat, var_name; +void read_aggr_sl1l2(int n, const SL1L2Info &s_info, + SL1L2Info &aggr_psum) { - // Set the column name to all upper case - ConcatString c = to_upper(col); + // Initialize + aggr_psum.zero_out(); - // Get the column value - if(c == "TOTAL") { v = (double) cts_info.cts.n(); } - else if(c == "F_RATE") { v = cts_info.cts.f_rate(); } - else if(c == "H_RATE") { v = cts_info.cts.h_rate(); } - else if(c == "O_RATE") { v = cts_info.cts.o_rate(); } - else { - mlog << Error << "\nstore_stat_fho() -> " - << "unsupported column name requested \"" << c - << "\"\n\n"; - exit(1); - } + // Loop over the SL1L2 columns + for(auto &col : sl1l2_columns) { - // Construct the NetCDF variable name - var_name << cs_erase << "series_fho_" << c; + ConcatString c(to_upper(col)); + ConcatString var_name(build_nc_var_name_partialsums( + STATLineType::sl1l2, c, + s_info)); - // Append threshold information - if(cts_info.fthresh == cts_info.othresh) { - var_name << "_" << cts_info.fthresh.get_abbr_str(); - } - else { - var_name << "_fcst" << cts_info.fthresh.get_abbr_str() - << "_obs" << cts_info.othresh.get_abbr_str(); + // Read aggregate data, if needed + if(aggr_data.count(var_name) == 0) { + aggr_data[var_name] = read_aggr_data_plane( + var_name, R"("ALL" SL1L2)"); + } + + // Populate the partial sums + aggr_psum.set_stat_sl1l2(col, aggr_data[var_name].buf()[n]); } - // Add map for this variable name - if(stat_data.count(var_name) == 0) { + return; +} - // Build key - lty_stat << "FHO_" << c; +//////////////////////////////////////////////////////////////////////// - // Add new map entry - add_nc_var(var_name, c, stat_long_name[lty_stat], - cts_info.fthresh.get_str(), - cts_info.othresh.get_str(), - bad_data_double); - } +void read_aggr_sal1l2(int n, const SL1L2Info &s_info, + SL1L2Info &aggr_psum) { - // Store the statistic value - put_nc_val(n, var_name, (float) v); + // Initialize + aggr_psum.zero_out(); + + // Loop over the SAL1L2 columns + for(auto &col : sal1l2_columns) { + + ConcatString c(to_upper(col)); + ConcatString var_name(build_nc_var_name_partialsums( + STATLineType::sal1l2, c, + s_info)); + + // Read aggregate data, if needed + if(aggr_data.count(var_name) == 0) { + aggr_data[var_name] = read_aggr_data_plane( + var_name, R"("ALL" SAL1L2)"); + } + + // Populate the partial sums + aggr_psum.set_stat_sal1l2(col, aggr_data[var_name].buf()[n]); + } return; } //////////////////////////////////////////////////////////////////////// -void store_stat_ctc(int n, const ConcatString &col, - const CTSInfo &cts_info) { - int v; - ConcatString lty_stat, var_name; +void read_aggr_pct(int n, const PCTInfo &pct_info, + PCTInfo &aggr_pct) { - // Set the column name to all upper case - ConcatString c = to_upper(col); + // Initialize + aggr_pct.pct = pct_info.pct; + aggr_pct.pct.zero_out(); - // Get the column value - if(c == "TOTAL") { v = cts_info.cts.n(); } - else if(c == "FY_OY") { v = cts_info.cts.fy_oy(); } - else if(c == "FY_ON") { v = cts_info.cts.fy_on(); } - else if(c == "FN_OY") { v = cts_info.cts.fn_oy(); } - else if(c == "FN_ON") { v = cts_info.cts.fn_on(); } - else if(c == "EC_VALUE") { v = cts_info.cts.ec_value(); } - else { - mlog << Error << "\nstore_stat_ctc() -> " - << "unsupported column name requested \"" << c - << "\"\n\n"; - exit(1); - } + // Get PCT column names + StringArray pct_cols(get_pct_columns(aggr_pct.pct.nrows()+1)); - // Construct the NetCDF variable name - var_name << cs_erase << "series_ctc_" << c; + // Loop over the PCT columns + for(int i=0; i " + << "the number of PCT thresholds do not match (" + << nint(v) << " != " << aggr_pct.pct.nrows()+1 + << ")!\n\n"; + exit(1); + } + // Set the event counts + else if(check_reg_exp("OY_[0-9]", c.c_str())) { - // Store the statistic value - put_nc_val(n, var_name, (float) v); + // Parse the index value from the column name + int i_row = atoi(strrchr(c.c_str(), '_') + 1) - 1; + aggr_pct.pct.set_event(i_row, nint(v)); + } + // Set the non-event counts + else if(check_reg_exp("ON_[0-9]", c.c_str())) { + + // Parse the index value from the column name + int i_row = atoi(strrchr(c.c_str(), '_') + 1) - 1; + aggr_pct.pct.set_nonevent(i_row, nint(v)); + } + } return; } //////////////////////////////////////////////////////////////////////// -void store_stat_cts(int n, const ConcatString &col, - const CTSInfo &cts_info) { - int i; - double v; - ConcatString lty_stat, var_name; - int n_ci = 1; +void do_probabilistic(int n, const PairDataPoint *pd_ptr) { - // Set the column name to all upper case - ConcatString c = to_upper(col); + mlog << Debug(4) + << "Computing Probabilistic Statistics.\n"; - // Check for columns with normal or bootstrap confidence limits - if(strstr(c.c_str(), "_NC") || strstr(c.c_str(), "_BC")) n_ci = cts_info.n_alpha; - - // Loop over the alpha values, if necessary - for(i=0; i " - << "unsupported column name requested \"" << c - << "\"\n\n"; - exit(1); - } + // Object to store probabilistic statistics + PCTInfo pct_info; - // Construct the NetCDF variable name - var_name << cs_erase << "series_cts_" << c; + // Setup the PCTInfo object + pct_info.fthresh = conf_info.fcat_ta; + pct_info.allocate_n_alpha(conf_info.ci_alpha.n()); + + for(int i=0; i 1) var_name << "_a" << cts_info.alpha[i]; - - // Add map for this variable name - if(stat_data.count(var_name) == 0) { - - // Build key - lty_stat << "CTS_" << c; + // Add statistic value for each possible PCT column + for(int j=0; j 1 ? cts_info.alpha[i] : bad_data_double)); + // Add statistic value for each possible PSTD column + for(int j=0; j= mcts_info.cts.nrows() || - j < 0 || j >= mcts_info.cts.ncols()) { - mlog << Error << "\nstore_stat_mctc() -> " - << "range check error for column name requested \"" << c - << "\"\n\n"; - exit(1); - } + // Aggregate the climatology brier score as a weighted + // average and recompute the brier skill score - // Retrieve the value - v = (double) mcts_info.cts.entry(i, j); - } - else { - mlog << Error << "\nstore_stat_mctc() -> " - << "unsupported column name requested \"" << c - << "\"\n\n"; - exit(1); - } + if(is_bad_data(briercl_pair) || total_pair == 0) return; // Construct the NetCDF variable name - var_name << cs_erase << "series_mctc_" << c; + ConcatString var_name(build_nc_var_name_probabilistic( + STATLineType::pstd, "BRIERCL", + pct_info, bad_data_double)); - // Add map for this variable name - if(stat_data.count(var_name) == 0) { + // Read aggregate data, if needed + if(aggr_data.count(var_name) == 0) { + aggr_data[var_name] = read_aggr_data_plane( + var_name, R"(the "BRIERCL" PSTD)"); + } - // Build key - lty_stat << "MCTC_" << d; + // Get the n-th BRIERCL value + double briercl_aggr = aggr_data[var_name].buf()[n]; + int total_aggr = read_aggr_total(n); - // Add new map entry - add_nc_var(var_name, c, stat_long_name[lty_stat], - mcts_info.fthresh.get_str(","), - mcts_info.othresh.get_str(","), - bad_data_double); - } + // Aggregate BRIERCL as a weighted average + if(!is_bad_data(briercl_pair) && + !is_bad_data(briercl_aggr) && + (total_pair + total_aggr) > 0) { - // Store the statistic value - put_nc_val(n, var_name, (float) v); + pct_info.briercl.v = (total_pair * briercl_pair + + total_aggr * briercl_aggr) / + (total_pair + total_aggr); + + // Compute the brier skill score + if(!is_bad_data(pct_info.brier.v) && + !is_bad_data(pct_info.briercl.v)) { + pct_info.bss = 1.0 - (pct_info.brier.v / pct_info.briercl.v); + } + } return; } //////////////////////////////////////////////////////////////////////// -void store_stat_mcts(int n, const ConcatString &col, - const MCTSInfo &mcts_info) { - int i; - double v; - ConcatString lty_stat, var_name; - int n_ci = 1; +void store_stat_categorical(int n, STATLineType lt, + const ConcatString &col, + const CTSInfo &cts_info) { // Set the column name to all upper case ConcatString c = to_upper(col); + // Handle ALL CTC columns + if(lt == STATLineType::ctc && c == all_columns) { + return store_stat_all_ctc(n, cts_info); + } + // Check for columns with normal or bootstrap confidence limits - if(strstr(c.c_str(), "_NC") || strstr(c.c_str(), "_BC")) n_ci = mcts_info.n_alpha; - - // Loop over the alpha values, if necessary - for(i=0; i " - << "unsupported column name requested \"" << c - << "\"\n\n"; - exit(1); - } + int n_alpha = 1; + if(lt == STATLineType::cts && is_ci_stat_name(c)) { + n_alpha = cts_info.n_alpha; + } - // Construct the NetCDF variable name - var_name << cs_erase << "series_mcts_" << c; + // Loop over the alpha values + for(int i_alpha=0; i_alpha 1) var_name << "_a" << mcts_info.alpha[i]; + // Store alpha value + double alpha = (n_alpha > 1 ? cts_info.alpha[i_alpha] : bad_data_double); + + // Construct the NetCDF variable name + ConcatString var_name(build_nc_var_name_categorical( + lt, c, cts_info, alpha)); // Add map for this variable name if(stat_data.count(var_name) == 0) { // Build key - lty_stat << "MCTS_" << c; + ConcatString lty_stat(statlinetype_to_string(lt)); + lty_stat << "_" << c; // Add new map entry - add_nc_var(var_name, c, stat_long_name[lty_stat], - mcts_info.fthresh.get_str(","), - mcts_info.othresh.get_str(","), - (n_ci > 1 ? mcts_info.alpha[i] : bad_data_double)); + add_stat_data(var_name, c, stat_long_name[lty_stat], + cts_info.fthresh.get_str(), + cts_info.othresh.get_str(), + alpha); } // Store the statistic value - put_nc_val(n, var_name, (float) v); + stat_data[var_name].dp.buf()[n] = cts_info.get_stat(lt, c, i_alpha); - } // end for i + } // end for i_alpha return; } //////////////////////////////////////////////////////////////////////// -void store_stat_cnt(int n, const ConcatString &col, - const CNTInfo &cnt_info) { - int i; - double v; - ConcatString lty_stat, var_name; - int n_ci = 1; +void store_stat_multicategory(int n, STATLineType lt, + const ConcatString &col, + const MCTSInfo &mcts_info) { // Set the column name to all upper case ConcatString c = to_upper(col); + // Handle ALL MCTC columns + if(lt == STATLineType::mctc && c == all_columns) { + return store_stat_all_mctc(n, mcts_info); + } + // Check for columns with normal or bootstrap confidence limits - if(strstr(c.c_str(), "_NC") || strstr(c.c_str(), "_BC")) n_ci = cnt_info.n_alpha; - - // Loop over the alpha values, if necessary - for(i=0; i " - << "unsupported column name requested \"" << c - << "\"\n\n"; - exit(1); - } + int n_alpha = 1; + if(lt == STATLineType::mcts && is_ci_stat_name(c)) { + n_alpha = mcts_info.n_alpha; + } + + // Loop over the alpha values + for(int i_alpha=0; i_alpha 1 ? mcts_info.alpha[i_alpha] : bad_data_double); // Construct the NetCDF variable name - var_name << cs_erase << "series_cnt_" << c; - - // Append threshold information, if supplied - if(cnt_info.fthresh.get_type() != thresh_na || - cnt_info.othresh.get_type() != thresh_na) { - var_name << "_fcst" << cnt_info.fthresh.get_abbr_str() - << "_" << setlogic_to_abbr(conf_info.cnt_logic) - << "_obs" << cnt_info.othresh.get_abbr_str(); - } + ConcatString var_name(build_nc_var_name_multicategory( + lt, c, alpha)); - // Append confidence interval alpha value - if(n_ci > 1) var_name << "_a" << cnt_info.alpha[i]; + // Store the data value + ConcatString col_name; + auto v = (float) mcts_info.get_stat(lt, c, col_name, i_alpha); // Add map for this variable name if(stat_data.count(var_name) == 0) { // Build key - lty_stat << "CNT_" << c; + ConcatString lty_stat; + lty_stat << statlinetype_to_string(lt) << "_" << col_name; // Add new map entry - add_nc_var(var_name, c, stat_long_name[lty_stat], - cnt_info.fthresh.get_str(), - cnt_info.othresh.get_str(), - (n_ci > 1 ? cnt_info.alpha[i] : bad_data_double)); + add_stat_data(var_name, c, stat_long_name[lty_stat], + mcts_info.fthresh.get_str(","), + mcts_info.othresh.get_str(","), + alpha); } // Store the statistic value - put_nc_val(n, var_name, (float) v); + stat_data[var_name].dp.buf()[n] = v; - } // end for i + } // end for i_alpha return; } //////////////////////////////////////////////////////////////////////// -void store_stat_sl1l2(int n, const ConcatString &col, - const SL1L2Info &s_info) { - double v; - ConcatString lty_stat, var_name; +void store_stat_continuous(int n, STATLineType lt, + const ConcatString &col, + const CNTInfo &cnt_info) { // Set the column name to all upper case ConcatString c = to_upper(col); - // Get the column value - if(c == "TOTAL") { v = (double) s_info.scount; } - else if(c == "FBAR") { v = s_info.fbar; } - else if(c == "OBAR") { v = s_info.obar; } - else if(c == "FOBAR") { v = s_info.fobar; } - else if(c == "FFBAR") { v = s_info.ffbar; } - else if(c == "OOBAR") { v = s_info.oobar; } - else if(c == "MAE") { v = s_info.smae; } - else { - mlog << Error << "\nstore_stat_sl1l2() -> " - << "unsupported column name requested \"" << c - << "\"\n\n"; - exit(1); - } + // Check for columns with normal or bootstrap confidence limits + int n_alpha = 1; + if(is_ci_stat_name(c)) n_alpha = cnt_info.n_alpha; - // Construct the NetCDF variable name - var_name << cs_erase << "series_sl1l2_" << c; + // Loop over the alpha values + for(int i_alpha=0; i_alpha 1 ? cnt_info.alpha[i_alpha] : bad_data_double); - // Add map for this variable name - if(stat_data.count(var_name) == 0) { + // Construct the NetCDF variable name + ConcatString var_name(build_nc_var_name_continuous( + lt, c, cnt_info, alpha)); - // Build key - lty_stat << "SL1L2_" << c; + // Add map for this variable name + if(stat_data.count(var_name) == 0) { - // Add new map entry - add_nc_var(var_name, c, stat_long_name[lty_stat], - s_info.fthresh.get_str(), - s_info.othresh.get_str(), - bad_data_double); - } + // Build key + ConcatString lty_stat(statlinetype_to_string(lt)); + lty_stat << "_" << c; - // Store the statistic value - put_nc_val(n, var_name, (float) v); + // Add new map entry + add_stat_data(var_name, c, stat_long_name[lty_stat], + cnt_info.fthresh.get_str(), + cnt_info.othresh.get_str(), + alpha); + } + + // Store the statistic value + stat_data[var_name].dp.buf()[n] = cnt_info.get_stat(c, i_alpha); + + } // end for i_alpha return; } //////////////////////////////////////////////////////////////////////// -void store_stat_sal1l2(int n, const ConcatString &col, - const SL1L2Info &s_info) { - double v; +void store_stat_partialsums(int n, STATLineType lt, + const ConcatString &col, + const SL1L2Info &s_info) { // Set the column name to all upper case ConcatString c = to_upper(col); - // Get the column value - if(c == "TOTAL") { v = (double) s_info.sacount; } - else if(c == "FABAR") { v = s_info.fabar; } - else if(c == "OABAR") { v = s_info.oabar; } - else if(c == "FOABAR") { v = s_info.foabar; } - else if(c == "FFABAR") { v = s_info.ffabar; } - else if(c == "OOABAR") { v = s_info.ooabar; } - else if(c == "MAE") { v = s_info.samae; } - else { - mlog << Error << "\nstore_stat_sal1l2() -> " - << "unsupported column name requested \"" << c - << "\"\n\n"; - exit(1); + // Handle ALL columns + if(c == all_columns) { + if(lt == STATLineType::sl1l2) return store_stat_all_sl1l2(n, s_info); + else if(lt == STATLineType::sal1l2) return store_stat_all_sal1l2(n, s_info); } // Construct the NetCDF variable name - ConcatString var_name("series_sal1l2_"); - var_name << c; - - // Append threshold information, if supplied - if(s_info.fthresh.get_type() != thresh_na || - s_info.othresh.get_type() != thresh_na) { - var_name << "_fcst" << s_info.fthresh.get_abbr_str() - << "_" << setlogic_to_abbr(conf_info.cnt_logic) - << "_obs" << s_info.othresh.get_abbr_str(); - } + ConcatString var_name(build_nc_var_name_partialsums( + lt, c, s_info)); // Add map for this variable name if(stat_data.count(var_name) == 0) { // Build key - ConcatString lty_stat("SAL1L2_"); - lty_stat << c; + ConcatString lty_stat(statlinetype_to_string(lt)); + lty_stat << "_" << c; // Add new map entry - add_nc_var(var_name, c, stat_long_name[lty_stat], - s_info.fthresh.get_str(), - s_info.othresh.get_str(), - bad_data_double); + add_stat_data(var_name, c, stat_long_name[lty_stat], + s_info.fthresh.get_str(), + s_info.othresh.get_str(), + bad_data_double); } // Store the statistic value - put_nc_val(n, var_name, (float) v); + stat_data[var_name].dp.buf()[n] = s_info.get_stat(lt, c); return; } //////////////////////////////////////////////////////////////////////// -void store_stat_pct(int n, const ConcatString &col, - const PCTInfo &pct_info) { - int i = 0; - double v; - ConcatString lty_stat, var_name; +void store_stat_probabilistic(int n, STATLineType lt, + const ConcatString &col, + const PCTInfo &pct_info) { // Set the column name to all upper case ConcatString c = to_upper(col); - ConcatString d = c; - - // Get index value for variable column numbers - if(check_reg_exp("_[0-9]", c.c_str())) { - - // Parse the index value from the column name - i = atoi(strrchr(c.c_str(), '_') + 1) - 1; - - // Range check - if(i < 0 || i >= pct_info.pct.nrows()) { - mlog << Error << "\nstore_stat_pct() -> " - << "range check error for column name requested \"" << c - << "\"\n\n"; - exit(1); - } - } // end if - - // Get the column value - if(c == "TOTAL") { v = (double) pct_info.pct.n(); } - else if(c == "N_THRESH") { v = (double) pct_info.pct.nrows() + 1; } - else if(check_reg_exp("THRESH_[0-9]", c.c_str())) { v = pct_info.pct.threshold(i); } - else if(check_reg_exp("OY_[0-9]", c.c_str())) { v = (double) pct_info.pct.event_count_by_row(i); - d = "OY_I"; } - else if(check_reg_exp("ON_[0-9]", c.c_str())) { v = (double) pct_info.pct.nonevent_count_by_row(i); - d = "ON_I"; } - else { - mlog << Error << "\nstore_stat_pct() -> " - << "unsupported column name requested \"" << c - << "\"\n\n"; - exit(1); - } - - // Construct the NetCDF variable name - var_name << cs_erase << "series_pct_" << c - << "_obs" << pct_info.othresh.get_abbr_str(); - - // Add map for this variable name - if(stat_data.count(var_name) == 0) { - - // Build key - lty_stat << "PCT_" << d; - // Add new map entry - add_nc_var(var_name, c, stat_long_name[lty_stat], - pct_info.fthresh.get_str(","), - pct_info.othresh.get_str(), - bad_data_double); + // Handle ALL PCT columns + if(lt == STATLineType::pct && c == all_columns) { + return store_stat_all_pct(n, pct_info); } - // Store the statistic value - put_nc_val(n, var_name, (float) v); - - return; -} - -//////////////////////////////////////////////////////////////////////// - -void store_stat_pstd(int n, const ConcatString &col, - const PCTInfo &pct_info) { - int i; - double v; - ConcatString lty_stat, var_name; - int n_ci = 1; + // Check for columns with normal or bootstrap confidence limits + int n_alpha = 1; + if(is_ci_stat_name(c)) n_alpha = pct_info.n_alpha; - // Set the column name to all upper case - ConcatString c = to_upper(col); + // Loop over the alpha values + for(int i_alpha=0; i_alpha " - << "unsupported column name requested \"" << c - << "\"\n\n"; - exit(1); - } + // Store alpha value + double alpha = (n_alpha > 1 ? pct_info.alpha[i_alpha] : bad_data_double); // Construct the NetCDF variable name - var_name << cs_erase << "series_pstd_" << c; + ConcatString var_name(build_nc_var_name_probabilistic( + lt, c, pct_info, alpha)); - // Append confidence interval alpha value - if(n_ci > 1) var_name << "_a" << pct_info.alpha[i]; + // Store the data value + ConcatString col_name; + auto v = (float) pct_info.get_stat(lt, c, col_name, i_alpha); // Add map for this variable name if(stat_data.count(var_name) == 0) { // Build key - lty_stat << "PSTD_" << c; + ConcatString lty_stat(statlinetype_to_string(lt)); + lty_stat << "_" << col_name; // Add new map entry - add_nc_var(var_name, c, stat_long_name[lty_stat], - pct_info.fthresh.get_str(","), - pct_info.othresh.get_str(), - (n_ci > 1 ? pct_info.alpha[i] : bad_data_double)); + add_stat_data(var_name, c, stat_long_name[lty_stat], + pct_info.fthresh.get_str(","), + pct_info.othresh.get_str(), + alpha); } // Store the statistic value - put_nc_val(n, var_name, (float) v); + stat_data[var_name].dp.buf()[n] = v; - } // end for i + } // end for i_alpha return; } //////////////////////////////////////////////////////////////////////// -void store_stat_pjc(int n, const ConcatString &col, - const PCTInfo &pct_info) { - int i = 0; - int tot; - double v; - ConcatString lty_stat, var_name; +void store_stat_all_ctc(int n, const CTSInfo &cts_info) { + for(auto &col : ctc_columns) { + store_stat_categorical(n, STATLineType::ctc, col, cts_info); + } +} - // Set the column name to all upper case - ConcatString c = to_upper(col); - ConcatString d = c; +//////////////////////////////////////////////////////////////////////// - // Get index value for variable column numbers - if(check_reg_exp("_[0-9]", c.c_str())) { +void store_stat_all_mctc(int n, const MCTSInfo &mcts_info) { + StringArray mctc_cols(get_mctc_columns(mcts_info.cts.nrows())); + for(int i=0; i= pct_info.pct.nrows()) { - mlog << Error << "\nstore_stat_pjc() -> " - << "range check error for column name requested \"" << c - << "\"\n\n"; - exit(1); - } - } // end if - - // Store the total count - tot = pct_info.pct.n(); - - // Get the column value - if(c == "TOTAL") { v = (double) tot; } - else if(c == "N_THRESH") { v = (double) pct_info.pct.nrows() + 1; } - else if(check_reg_exp("THRESH_[0-9]", c.c_str())) { v = pct_info.pct.threshold(i); - d = "THRESH_I"; } - else if(check_reg_exp("OY_TP_[0-9]", c.c_str())) { v = pct_info.pct.event_count_by_row(i)/(double) tot; - d = "OY_TP_I"; } - else if(check_reg_exp("ON_TP_[0-9]", c.c_str())) { v = pct_info.pct.nonevent_count_by_row(i)/(double) tot; - d = "ON_TP_I"; } - else if(check_reg_exp("CALIBRATION_[0-9]", c.c_str())) { v = pct_info.pct.row_calibration(i); - d = "CALIBRATION_I"; } - else if(check_reg_exp("REFINEMENT_[0-9]", c.c_str())) { v = pct_info.pct.row_refinement(i); - d = "REFINEMENT_I"; } - else if(check_reg_exp("LIKELIHOOD_[0-9]", c.c_str())) { v = pct_info.pct.row_event_likelihood(i); - d = "LIKELIHOOD_I"; } - else if(check_reg_exp("BASER_[0-9]", c.c_str())) { v = pct_info.pct.row_obar(i); - d = "BASER_I"; } - else { - mlog << Error << "\nstore_stat_pjc() -> " - << "unsupported column name requested \"" << c - << "\"\n\n"; - exit(1); +void store_stat_all_sl1l2(int n, const SL1L2Info &s_info) { + for(auto &col : sl1l2_columns) { + store_stat_partialsums(n, STATLineType::sl1l2, col, s_info); + } +} + +//////////////////////////////////////////////////////////////////////// + +void store_stat_all_sal1l2(int n, const SL1L2Info &s_info) { + for(auto &col : sal1l2_columns) { + store_stat_partialsums(n, STATLineType::sal1l2, col, s_info); } +} - // Construct the NetCDF variable name - var_name << cs_erase << "series_pjc_" << c - << "_obs" << pct_info.othresh.get_abbr_str(); +//////////////////////////////////////////////////////////////////////// - // Add map for this variable name - if(stat_data.count(var_name) == 0) { +void store_stat_all_pct(int n, const PCTInfo &pct_info) { + StringArray pct_cols(get_pct_columns(pct_info.pct.nrows() + 1)); + for(int i=0; i= pct_info.pct.nrows()) { - mlog << Error << "\nstore_stat_prc() -> " - << "range check error for column name requested \"" << c - << "\"\n\n"; - exit(1); - } +//////////////////////////////////////////////////////////////////////// - // Get the 2x2 contingency table for this row - ct = pct_info.pct.ctc_by_row(i); +ConcatString build_nc_var_name_partialsums( + STATLineType lt, const ConcatString &col, + const SL1L2Info &s_info) { - } // end if + // Append the column name + ConcatString var_name("series_"); + var_name << to_lower(statlinetype_to_string(lt)) << "_" << col; - // Get the column value - if(c == "TOTAL") { v = (double) pct_info.pct.n(); } - else if(c == "N_THRESH") { v = (double) pct_info.pct.nrows() + 1; } - else if(check_reg_exp("THRESH_[0-9]", c.c_str())) { v = pct_info.pct.threshold(i); - d = "THRESH_I"; } - else if(check_reg_exp("PODY_[0-9]", c.c_str())) { v = ct.pod_yes(); - d = "PODY_I"; } - else if(check_reg_exp("POFD_[0-9]", c.c_str())) { v = ct.pofd(); - d = "POFD_I"; } - else { - mlog << Error << "\nstore_stat_prc() -> " - << "unsupported column name requested \"" << c - << "\"\n\n"; - exit(1); + // Append threshold information, if supplied + if(s_info.fthresh.get_type() != thresh_na || + s_info.othresh.get_type() != thresh_na) { + var_name << "_fcst" << s_info.fthresh.get_abbr_str() + << "_" << setlogic_to_abbr(s_info.logic) + << "_obs" << s_info.othresh.get_abbr_str(); } - // Add map for this variable name - if(stat_data.count(var_name) == 0) { + return var_name; +} - // Build key - lty_stat << "PRC_" << d; +//////////////////////////////////////////////////////////////////////// - // Add new map entry - add_nc_var(var_name, c, stat_long_name[lty_stat], - pct_info.fthresh.get_str(","), - pct_info.othresh.get_str(), - bad_data_double); +ConcatString build_nc_var_name_continuous( + STATLineType lt, const ConcatString &col, + const CNTInfo &cnt_info, double alpha) { + + // Append the column name + ConcatString var_name("series_"); + var_name << to_lower(statlinetype_to_string(lt)) << "_" << col; + + // Append threshold information, if supplied + if(cnt_info.fthresh.get_type() != thresh_na || + cnt_info.othresh.get_type() != thresh_na) { + var_name << "_fcst" << cnt_info.fthresh.get_abbr_str() + << "_" << setlogic_to_abbr(cnt_info.logic) + << "_obs" << cnt_info.othresh.get_abbr_str(); } - // Store the statistic value - put_nc_val(n, var_name, (float) v); + // Append confidence interval alpha value + if(!is_bad_data(alpha)) var_name << "_a" << alpha; - return; + return var_name; +} + +//////////////////////////////////////////////////////////////////////// + +ConcatString build_nc_var_name_probabilistic( + STATLineType lt, const ConcatString &col, + const PCTInfo &pct_info, double alpha) { + + // Append the column name + ConcatString var_name("series_"); + var_name << to_lower(statlinetype_to_string(lt)) << "_" << col; + + // Append the observation threshold + var_name << "_obs" << pct_info.othresh.get_abbr_str(); + + // Append confidence interval alpha value + if(!is_bad_data(alpha)) var_name << "_a" << alpha; + + return var_name; } //////////////////////////////////////////////////////////////////////// @@ -2200,9 +2204,10 @@ void setup_nc_file(const VarInfo *fcst_info, const VarInfo *obs_info) { if (deflate_level < 0) deflate_level = conf_info.get_compression_level(); // Add the series length variable - NcVar var = add_var(nc_out, "n_series", ncInt, deflate_level); + NcVar var = add_var(nc_out, n_series_var_name, ncInt, deflate_level); add_att(&var, "long_name", "length of series"); + int n_series = n_series_pair + n_series_aggr; if(!put_nc_data(&var, &n_series)) { mlog << Error << "\nsetup_nc_file() -> " << "error writing the series length variable.\n\n"; @@ -2217,68 +2222,77 @@ void setup_nc_file(const VarInfo *fcst_info, const VarInfo *obs_info) { //////////////////////////////////////////////////////////////////////// -void add_nc_var(const ConcatString &var_name, - const ConcatString &name, - const ConcatString &long_name, - const ConcatString &fcst_thresh, - const ConcatString &obs_thresh, - double alpha) { - NcVarData d; - - int deflate_level = compress_level; - if (deflate_level < 0) deflate_level = conf_info.get_compression_level(); - - // Add a new variable to the NetCDF file - NcVar var = add_var(nc_out, (string)var_name, ncFloat, lat_dim, lon_dim, deflate_level); - d.var = new NcVar(var); - - // Add variable attributes - add_att(d.var, "_FillValue", bad_data_float); - if(name.length() > 0) add_att(d.var, "name", (string)name); - if(long_name.length() > 0) add_att(d.var, "long_name", (string)long_name); - if(fcst_thresh.length() > 0) add_att(d.var, "fcst_thresh", (string)fcst_thresh); - if(obs_thresh.length() > 0) add_att(d.var, "obs_thresh", (string)obs_thresh); - if(!is_bad_data(alpha)) add_att(d.var, "alpha", alpha); - - // Store the new NcVarData object in the map - stat_data[var_name] = d; +void add_stat_data(const ConcatString &var_name, + const ConcatString &name, + const ConcatString &long_name, + const ConcatString &fcst_thresh, + const ConcatString &obs_thresh, + double alpha) { + + NcVarData data; + data.dp.set_size(grid.nx(), grid.ny(), bad_data_double); + data.name = name; + data.long_name = long_name; + data.fcst_thresh = fcst_thresh; + data.obs_thresh = obs_thresh; + data.alpha = alpha; + + // Store the new NcVarData object + stat_data[var_name] = data; + stat_data_keys.push_back(var_name); return; } //////////////////////////////////////////////////////////////////////// -void put_nc_val(int n, const ConcatString &var_name, float v) { - int x, y; +void write_stat_data() { - // Determine x,y location - DefaultTO.one_to_two(grid.nx(), grid.ny(), n, x, y); + mlog << Debug(2) + << "Writing " << stat_data_keys.size() + << " output variables.\n"; - // Check for key in the map - if(stat_data.count(var_name) == 0) { - mlog << Error << "\nput_nc_val() -> " - << "variable name \"" << var_name - << "\" does not exist in the map.\n\n"; - exit(1); + int deflate_level = compress_level; + if(deflate_level < 0) deflate_level = conf_info.get_compression_level(); + + // Allocate memory to store data values for each grid point + float *data = new float [grid.nx()*grid.ny()]; + + // Write output for each stat_data map entry + for(auto &key : stat_data_keys) { + + NcVarData *ptr = &stat_data[key]; + + // Add a new variable to the NetCDF file + NcVar nc_var = add_var(nc_out, key, ncFloat, lat_dim, lon_dim, deflate_level); + + // Add variable attributes + add_att(&nc_var, "_FillValue", bad_data_float); + add_att(&nc_var, "name", ptr->name); + add_att(&nc_var, "long_name", ptr->long_name); + if(ptr->fcst_thresh.length() > 0) add_att(&nc_var, "fcst_thresh", ptr->fcst_thresh); + if(ptr->obs_thresh.length() > 0) add_att(&nc_var, "obs_thresh", ptr->obs_thresh); + if(!is_bad_data(ptr->alpha)) add_att(&nc_var, "alpha", ptr->alpha); + + // Store the data + for(int x=0; xdp(x, y); + } // end for y + } // end for x + + // Write out the data + if(!put_nc_data_with_dims(&nc_var, &data[0], grid.ny(), grid.nx())) { + mlog << Error << "\nwrite_stat_data() -> " + << R"(error writing ")" << key + << R"(" data to the output file.)" << "\n\n"; + exit(1); + } } - // Get the NetCDF variable to be written - NcVar *var = stat_data[var_name].var; - - long offsets[2]; - long lengths[2]; - offsets[0] = y; - offsets[1] = x; - lengths[0] = 1; - lengths[1] = 1; - - // Store the current value - if(!put_nc_data(var, &v, lengths, offsets)) { - mlog << Error << "\nput_nc_val() -> " - << "error writing to variable " << var_name - << " for point (" << x << ", " << y << ").\n\n"; - exit(1); - } + // Clean up + if(data) { delete [] data; data = (float *) nullptr; } return; } @@ -2311,25 +2325,23 @@ void set_range(const int &t, int &beg, int &end) { void clean_up() { - // Deallocate NetCDF variable for each map entry - map::const_iterator it; - for(it=stat_data.begin(); it!=stat_data.end(); it++) { - if(it->second.var) { delete it->second.var; } - } - // Close the output NetCDF file if(nc_out) { // List the NetCDF file after it is finished - mlog << Debug(1) << "Output file: " << out_file << "\n"; + mlog << Debug(1) + << "Output file: " << out_file << "\n"; delete nc_out; nc_out = (NcFile *) nullptr; } + // Close the aggregate NetCDF file + if(aggr_nc.MetNc) aggr_nc.close(); + // Deallocate memory for data files - if(fcst_mtddf) { delete fcst_mtddf; fcst_mtddf = (Met2dDataFile *) nullptr; } - if(obs_mtddf) { delete obs_mtddf; obs_mtddf = (Met2dDataFile *) nullptr; } + if(fcst_mtddf) { delete fcst_mtddf; fcst_mtddf = nullptr; } + if(obs_mtddf) { delete obs_mtddf; obs_mtddf = nullptr; } // Deallocate memory for the random number generator rng_free(rng_ptr); @@ -2348,6 +2360,7 @@ void usage() { << "\t-fcst file_1 ... file_n | fcst_file_list\n" << "\t-obs file_1 ... file_n | obs_file_list\n" << "\t[-both file_1 ... file_n | both_file_list]\n" + << "\t[-aggr file]\n" << "\t[-paired]\n" << "\t-out file\n" << "\t-config file\n" @@ -2355,37 +2368,53 @@ void usage() { << "\t[-v level]\n" << "\t[-compress level]\n\n" - << "\twhere\t\"-fcst file_1 ... file_n\" are the gridded " + << "\twhere\t" + << R"("-fcst file_1 ... file_n" are the gridded )" << "forecast files to be used (required).\n" - << "\t\t\"-fcst fcst_file_list\" is an ASCII file containing " + << "\t\t" + << R"("-fcst fcst_file_list" is an ASCII file containing )" << "a list of gridded forecast files to be used (required).\n" - << "\t\t\"-obs file_1 ... file_n\" are the gridded " + << "\t\t" + << R"("-obs file_1 ... file_n" are the gridded )" << "observation files to be used (required).\n" - << "\t\t\"-obs obs_file_list\" is an ASCII file containing " + << "\t\t" + << R"("-obs obs_file_list" is an ASCII file containing )" << "a list of gridded observation files to be used (required).\n" - << "\t\t\"-both\" sets the \"-fcst\" and \"-obs\" options to " + << "\t\t" + << R"("-both" sets the "-fcst" and "-obs" options to )" << "the same set of files (optional).\n" - << "\t\t\"-paired\" to indicate that the input -fcst and -obs " + << "\t\t" + << R"("-aggr file" specifies a series_analysis output )" + << "file with partial sums and/or contingency table counts to be " + << "updated prior to deriving statistics (optional).\n" + + << "\t\t" + << R"("-paired" to indicate that the input -fcst and -obs )" << "file lists are already paired (optional).\n" - << "\t\t\"-out file\" is the NetCDF output file containing " + << "\t\t" + << R"("-out file" is the NetCDF output file containing )" << "computed statistics (required).\n" - << "\t\t\"-config file\" is a SeriesAnalysisConfig file " + << "\t\t" + << R"("-config file" is a SeriesAnalysisConfig file )" << "containing the desired configuration settings (required).\n" - << "\t\t\"-log file\" outputs log messages to the specified " + << "\t\t" + << R"("-log file" outputs log messages to the specified )" << "file (optional).\n" - << "\t\t\"-v level\" overrides the default level of logging (" + << "\t\t" + << R"("-v level" overrides the default level of logging ()" << mlog.verbosity_level() << ") (optional).\n" - << "\t\t\"-compress level\" overrides the compression level of NetCDF variable (" + << "\t\t" + << R"("-compress level" overrides the compression level of NetCDF variable ()" << conf_info.get_compression_level() << ") (optional).\n\n" << flush; exit(1); @@ -2412,6 +2441,12 @@ void set_both_files(const StringArray & a) { //////////////////////////////////////////////////////////////////////// +void set_aggr(const StringArray & a) { + aggr_file = a[0]; +} + +//////////////////////////////////////////////////////////////////////// + void set_paired(const StringArray & a) { paired = true; } @@ -2449,8 +2484,8 @@ void parse_long_names() { f_in.open(file_name.c_str()); if(!f_in) { mlog << Error << "\nparse_long_names() -> " - << "can't open the ASCII file \"" << file_name - << "\" for reading\n\n"; + << R"(can't open the ASCII file ") << file_name + << R"(" for reading!)" << "\n\n"; exit(1); } diff --git a/src/tools/core/series_analysis/series_analysis.h b/src/tools/core/series_analysis/series_analysis.h index 2540540015..73b2f3d6f6 100644 --- a/src/tools/core/series_analysis/series_analysis.h +++ b/src/tools/core/series_analysis/series_analysis.h @@ -17,7 +17,6 @@ // 000 12/10/12 Halley Gotway New // 001 09/28/22 Prestopnik MET #2227 Remove namespace std and netCDF from header files // -// //////////////////////////////////////////////////////////////////////// #ifndef __SERIES_ANALYSIS_H__ @@ -43,6 +42,7 @@ #include "series_analysis_conf_info.h" #include "vx_data2d_factory.h" +#include "vx_data2d_nc_met.h" #include "vx_grid.h" #include "vx_util.h" #include "vx_stat_out.h" @@ -60,6 +60,11 @@ static const char * program_name = "series_analysis"; static const char * default_config_filename = "MET_BASE/config/SeriesAnalysisConfig_default"; +static const char * all_columns = "ALL"; +static const char * n_series_var_name = "n_series"; + +static const char * total_name = "TOTAL"; + //////////////////////////////////////////////////////////////////////// // // Variables for Command Line Arguments @@ -68,10 +73,11 @@ static const char * default_config_filename = // Input files static StringArray fcst_files, found_fcst_files; -static StringArray obs_files, found_obs_files; -static GrdFileType ftype = FileType_None; -static GrdFileType otype = FileType_None; -static bool paired = false; +static StringArray obs_files, found_obs_files; +static GrdFileType ftype = FileType_None; +static GrdFileType otype = FileType_None; +static ConcatString aggr_file; +static bool paired = false; static int compress_level = -1; // Output file @@ -88,17 +94,26 @@ static SeriesAnalysisConfInfo conf_info; //////////////////////////////////////////////////////////////////////// // Output NetCDF file -static netCDF::NcFile *nc_out = (netCDF::NcFile *) nullptr; -static netCDF::NcDim lat_dim; -static netCDF::NcDim lon_dim ; +static netCDF::NcFile *nc_out = nullptr; +static netCDF::NcDim lat_dim; +static netCDF::NcDim lon_dim ; -// Structure to store computed statistics and corresponding metadata +// Structure to store computed statistics struct NcVarData { - netCDF::NcVar * var; // Pointer to NetCDF variable + DataPlane dp; + std::string name; + std::string long_name; + std::string fcst_thresh; + std::string obs_thresh; + double alpha; }; // Mapping of NetCDF variable name to computed statistic -std::map stat_data; +std::map stat_data; +std::vector stat_data_keys; + +// Mapping of aggregate NetCDF variable name to DataPlane +std::map aggr_data; //////////////////////////////////////////////////////////////////////// // @@ -108,16 +123,16 @@ std::map stat_data; // Grid variables static Grid grid; -static int nxy = 0; static int n_reads = 1; // Initialize to at least one pass // Data file factory and input files static Met2dDataFileFactory mtddf_factory; -static Met2dDataFile *fcst_mtddf = (Met2dDataFile *) nullptr; -static Met2dDataFile *obs_mtddf = (Met2dDataFile *) nullptr; +static Met2dDataFile *fcst_mtddf = nullptr; +static Met2dDataFile *obs_mtddf = nullptr; +static MetNcMetDataFile aggr_nc; // Pointer to the random number generator to be used -static gsl_rng *rng_ptr = (gsl_rng *) nullptr; +static gsl_rng *rng_ptr = nullptr; // Enumeration of ways that a series can be defined enum class SeriesType { @@ -130,7 +145,8 @@ enum class SeriesType { static SeriesType series_type = SeriesType::None; // Series length -static int n_series = 0; +static int n_series_pair = 0; // Input pair data series +static int n_series_aggr = 0; // Input aggr series // Range of timing values encountered in the data static unixtime fcst_init_beg = (unixtime) 0; diff --git a/src/tools/core/stat_analysis/aggr_stat_line.cc b/src/tools/core/stat_analysis/aggr_stat_line.cc index 441cbe5e07..6c0a7add52 100644 --- a/src/tools/core/stat_analysis/aggr_stat_line.cc +++ b/src/tools/core/stat_analysis/aggr_stat_line.cc @@ -706,7 +706,7 @@ void aggr_summary_lines(LineDataFile &f, STATAnalysisJob &job, else if(do_cnt && (line.type() == STATLineType::sl1l2 || line.type() == STATLineType::sal1l2)) { parse_sl1l2_line(line, sl1l2_info); - compute_cntinfo(sl1l2_info, 0, cnt_info); + compute_cntinfo(sl1l2_info, cnt_info); } // @@ -731,7 +731,7 @@ void aggr_summary_lines(LineDataFile &f, STATAnalysisJob &job, // if((line.type() == STATLineType::fho || line.type() == STATLineType::ctc) && lty == STATLineType::cts) { - v = cts_info.get_stat(req_col[i].c_str()); + v = cts_info.get_stat_cts(req_col[i].c_str()); w = cts_info.cts.n(); } else if(line.type() == STATLineType::sl1l2 && lty == STATLineType::cnt) { @@ -743,7 +743,7 @@ void aggr_summary_lines(LineDataFile &f, STATAnalysisJob &job, w = cnt_info.n; } else if(line.type() == STATLineType::vl1l2 && lty == STATLineType::vcnt) { - v = vl1l2_info.get_stat(req_col[i].c_str()); + v = vl1l2_info.get_stat_vcnt(req_col[i].c_str()); w = (is_vector_dir_stat(line.type(), req_col[i].c_str()) ? vl1l2_info.dcount : vl1l2_info.vcount); @@ -1519,7 +1519,7 @@ void aggr_psum_lines(LineDataFile &f, STATAnalysisJob &job, // // Compute the stats for the current time // - compute_cntinfo(cur_sl1l2, 0, cur_cnt); + compute_cntinfo(cur_sl1l2, cur_cnt); // // Append the stats diff --git a/src/tools/core/stat_analysis/skill_score_index_job.cc b/src/tools/core/stat_analysis/skill_score_index_job.cc index 9651e50a0d..88bc450900 100644 --- a/src/tools/core/stat_analysis/skill_score_index_job.cc +++ b/src/tools/core/stat_analysis/skill_score_index_job.cc @@ -245,17 +245,17 @@ SSIDXData SSIndexJobInfo::compute_ss_index() { // Continuous stats if(job_lt[i] == STATLineType::sl1l2) { - compute_cntinfo(fcst_sl1l2[i], 0, fcst_cnt); + compute_cntinfo(fcst_sl1l2[i], fcst_cnt); fcst_stat = fcst_cnt.get_stat(fcst_job[i].column[0].c_str()); - compute_cntinfo(ref_sl1l2[i], 0, ref_cnt); + compute_cntinfo(ref_sl1l2[i], ref_cnt); ref_stat = ref_cnt.get_stat(fcst_job[i].column[0].c_str()); } // Categorical stats else if(job_lt[i] == STATLineType::ctc) { fcst_cts[i].compute_stats(); - fcst_stat = fcst_cts[i].get_stat(fcst_job[i].column[0].c_str()); + fcst_stat = fcst_cts[i].get_stat_cts(fcst_job[i].column[0].c_str()); ref_cts[i].compute_stats(); - ref_stat = ref_cts[i].get_stat(fcst_job[i].column[0].c_str()); + ref_stat = ref_cts[i].get_stat_cts(fcst_job[i].column[0].c_str()); } else { mlog << Error << "\nSSIndexJobInfo::compute_ss_index() -> " diff --git a/src/tools/core/stat_analysis/stat_analysis_job.cc b/src/tools/core/stat_analysis/stat_analysis_job.cc index 3492309da1..b3a9eb12cb 100644 --- a/src/tools/core/stat_analysis/stat_analysis_job.cc +++ b/src/tools/core/stat_analysis/stat_analysis_job.cc @@ -1876,10 +1876,7 @@ void write_job_aggr_psum(STATAnalysisJob &job, STATLineType lt, // // Compute CNTInfo statistics from the aggregated partial sums // - if(it->second.sl1l2_info.scount > 0) - compute_cntinfo(it->second.sl1l2_info, 0, it->second.cnt_info); - else - compute_cntinfo(it->second.sl1l2_info, 1, it->second.cnt_info); + compute_cntinfo(it->second.sl1l2_info, it->second.cnt_info); if(job.stat_out) { write_cnt_cols(it->second.cnt_info, 0, job.stat_at, @@ -2610,7 +2607,7 @@ void write_job_aggr_ssvar(STATAnalysisJob &job, STATLineType lt, // // Compute CNTInfo statistics from the aggregated partial sums // - compute_cntinfo(bin_it->second.sl1l2_info, 0, cnt_info); + compute_cntinfo(bin_it->second.sl1l2_info, cnt_info); // // Write the output STAT line diff --git a/src/tools/other/pb2nc/pb2nc.cc b/src/tools/other/pb2nc/pb2nc.cc index a9abdf1be9..94a7750bad 100644 --- a/src/tools/other/pb2nc/pb2nc.cc +++ b/src/tools/other/pb2nc/pb2nc.cc @@ -58,9 +58,10 @@ // from header files // 019 07/21/23 Prestopnik, J. MET #2615 Add #include to compile // successfully using gcc12 +// 020 08/26/24 Halley Gotway MET #2938 Silence center time warnings +// //////////////////////////////////////////////////////////////////////// - #include #include #include @@ -467,7 +468,7 @@ int met_main(int argc, char *argv[]) { if (collect_metadata) { // Process each PrepBufr file - for(int i=0; i 1) { + if(pbfile.n() > 1) { mlog << Error << "\n" << method_name << "the \"-dump\" and \"-pbfile\" options may not be " << "used together. Only one Bufr file may be dump " @@ -1067,15 +1067,14 @@ void process_pbfile(int i_pb) { cape_h = pbl_h = 0; cape_p = pbl_p = bad_data_float; - diff_file_time_count = 0; cycle_minute = missing_cycle_minute; // initialize - // Derive quantities which can be derived from - // P, Q, T, Z, U, V + // Check the number of variables to be derived from: + // P, Q, T, Z, U, V if (n_derive_gc > bufr_derive_cfgs.size()) { - mlog << Debug(3) << "\n" << method_name - << "Skip the derived variables because of not requested (" - << bufr_derive_cfgs.size() << ").\n\n"; + mlog << Debug(3) << method_name + << "No observation variables requested to be derived (" + << bufr_derive_cfgs.size() << ").\n"; } for (int idx=0; idx= debug_level_for_performance) { end_t = clock(); - cout << (end_t-start_t)/double(CLOCKS_PER_SEC) - << " seconds\n"; + cout << (end_t-start_t)/double(CLOCKS_PER_SEC) << " seconds\n"; start_t = clock(); } } @@ -1149,10 +1147,10 @@ void process_pbfile(int i_pb) { << " to " << end_time_str << "\n"; } - else if(file_ut != msg_ut) { - diff_file_time_count++; - if (!diff_file_times.has(msg_ut)) diff_file_times.add(msg_ut); - } + + // Keep track of the unique message reference times, + // searching from newest to oldest + unique_msg_ut.add_uniq(msg_ut, false); // Add minutes by calling IUPVS01(unit, "MINU") if (cycle_minute != missing_cycle_minute) { @@ -1802,7 +1800,7 @@ void process_pbfile(int i_pb) { int n_other_file_obs = 0; int n_other_total_obs = 0; int n_other_hdr_obs = 0; - int var_count = bufr_obs_name_arr.n_elements(); + int var_count = bufr_obs_name_arr.n(); for (int vIdx=0; vIdx= debug_level_for_performance) { end_t = clock(); log_message << (end_t-start_t)/double(CLOCKS_PER_SEC) << " seconds"; - //start_t = clock(); } cout << log_message << "\n"; } - if(0 < diff_file_time_count && 0 < diff_file_times.n_elements()) { - mlog << Warning << "\n" << method_name - << "The observation time should remain the same for " - << "all " << (is_prepbufr ? "PrepBufr" : "Bufr") << " messages\n"; - mlog << Warning << method_name << " " - << diff_file_time_count << " messages with different reference time (" - << unix_to_yyyymmdd_hhmmss(file_ut) << "):\n"; - for (int idx=0; idx 1) { + + ConcatString msg_cs; + msg_cs << "Found " << unique_msg_ut.n() << " unique " + << (is_prepbufr ? "PrepBufr" : "Bufr") + << " message reference time(s) from " + << unix_to_yyyymmdd_hhmmss(unique_msg_ut.min()) << " to " + << unix_to_yyyymmdd_hhmmss(unique_msg_ut.max()) << ".\n"; + + // Print warning if the time window was not defined on the command line + if(valid_beg_ut == (unixtime) 0 && + valid_end_ut == (unixtime) 0) { + mlog << Warning << "\n" << method_name + << msg_cs << "\n" + << R"(Set the "-valid_beg" and/or "-valid_end" )" + << "command line options to define the retention " + << "time window.\n\n"; + } + else { + mlog << Debug(3) << msg_cs; } - mlog << Warning << "\n"; } nc_point_obs.write_observation(); - if(mlog.verbosity_level() > 0) cout << "\n" << flush; - mlog << Debug(2) << "Messages processed\t\t\t= " << npbmsg << "\n" << "Rejected based on message type\t\t= " @@ -2065,7 +2070,7 @@ void process_pbfile(int i_pb) { int debug_level = 5; if(mlog.verbosity_level() >= debug_level) { log_message = "Filtered time:"; - for (kk=0; kk variable \"" @@ -2388,9 +2393,9 @@ void process_pbfile_metadata(int i_pb) { } } // if (0 == i_read) - if (0 == unchecked_var_list.n_elements()) break; + if (0 == unchecked_var_list.n()) break; - int var_count = unchecked_var_list.n_elements(); + int var_count = unchecked_var_list.n(); for (int vIdx=var_count-1; vIdx>=0; vIdx--) { int nlev2, count; bool has_valid_data; @@ -2441,7 +2446,7 @@ void process_pbfile_metadata(int i_pb) { bool has_prepbufr_vars = false; const char * tmp_var_name; bufr_obs_name_arr.clear(); - for (index=0; index obs_var_map = conf_info.getObsVarMap(); - for(int i=0; i= 0 && (do_all_vars || code < bufr_target_variables.n_elements())); + return(code >= 0 && (do_all_vars || code < bufr_target_variables.n())); } //////////////////////////////////////////////////////////////////////// bool keep_level_category(int category) { - return(conf_info.level_category.n_elements() == 0 || + return(conf_info.level_category.n() == 0 || conf_info.level_category.has(category, false)); } @@ -2884,8 +2889,8 @@ void display_bufr_variables(const StringArray &all_vars, const StringArray &all_ ConcatString description; ConcatString line_buf; - mlog << Debug(1) << "\n Header variables (" << hdr_arr.n_elements() << ") :\n"; - for(i=0; i pqtzuv_map_tq, log_array.add(buf.c_str()); } offset = 0; - for (int idx=log_array.n_elements()-1; idx>=0; idx--) { + for (int idx=log_array.n()-1; idx>=0; idx--) { mlog << Debug(PBL_DEBUG_LEVEL) << method_name << "TQZ record: " << offset++ << "\t" << log_array[idx] << "\n"; } @@ -3463,7 +3468,7 @@ void log_tqz_and_uv(map pqtzuv_map_tq, log_array.add(buf.c_str()); } offset = 0; - for (int idx=log_array.n_elements()-1; idx>=0; idx--) { + for (int idx=log_array.n()-1; idx>=0; idx--) { mlog << Debug(PBL_DEBUG_LEVEL) << method_name << " UV record: " << offset++ << "\t" << log_array[idx] << "\n"; } @@ -3496,7 +3501,7 @@ void log_merged_tqz_uv(map pqtzuv_map_tq, log_array.add(buf.c_str()); } int offset = 0; - for (int idx=log_array.n_elements()-1; idx>=0; idx--) { + for (int idx=log_array.n()-1; idx>=0; idx--) { mlog << Debug(PBL_DEBUG_LEVEL) << method_name << " merged: " << offset++ << "\t" << log_array[idx] << "\n"; } @@ -3523,7 +3528,7 @@ void log_pbl_input(int pbl_level, const char *method_name) { int offset = 0; mlog << Debug(PBL_DEBUG_LEVEL) << method_name << "input to calpbl_ (buffer): index, P, Q, T, Z, U, V\n"; - for (int idx=log_array.n_elements()-1; idx>=0; idx--) { + for (int idx=log_array.n()-1; idx>=0; idx--) { mlog << Debug(PBL_DEBUG_LEVEL) << method_name << " " << offset++ << "\t" << log_array[idx] << "\n"; }