diff --git a/met/configure.ac b/met/configure.ac index 2fc599e47a..7008748079 100644 --- a/met/configure.ac +++ b/met/configure.ac @@ -8,6 +8,13 @@ AC_CONFIG_HEADERS([config.h]) AM_INIT_AUTOMAKE([1.9 foreign]) +# OpenMP + +AC_OPENMP() + +CPPFLAGS="${CPPFLAGS} ${OPENMP_CFLAGS}" +LDFLAGS="${LDFLAGS} ${OPENMP_CFLAGS}" + # # Look for the NetCDF library # diff --git a/met/docs/Users_Guide/config_options.rst b/met/docs/Users_Guide/config_options.rst index e6d2349cc4..b56cb65d75 100644 --- a/met/docs/Users_Guide/config_options.rst +++ b/met/docs/Users_Guide/config_options.rst @@ -202,6 +202,12 @@ to use one configuration file rather than maintianing many very similar ones. An error in the syntax of a configuration file will result in an error from the MET tool stating the location of the parsing error. +Runtime Environment Variables +----------------------------- + +MET_BASE +^^^^^^^^ + The MET_BASE variable is defined in the code at compilation time as the path to the MET shared data. These are things like the default configuration files, common polygons and color scales. MET_BASE may be used in the MET configuration @@ -209,6 +215,9 @@ files when specifying paths and the appropriate path will be substituted in. If MET_BASE is defined as an environment variable, its value will be used instead of the one defined at compilation time. +MET_OBS_ERROR_TABLE +^^^^^^^^^^^^^^^^^^^ + The MET_OBS_ERROR_TABLE environment variable can be set to specify the location of an ASCII file defining observation error information. The default table can be found in the installed *share/met/table_files/obs_error_table.txt*. This @@ -236,6 +245,9 @@ values and randomly perturbed ensemble member values. Values less than MIN are reset to the mimimum value and values greater than MAX are reset to the maximum value. A value of NA indicates that the variable is unbounded. +MET_GRIB_TABLES +^^^^^^^^^^^^^^^ + The MET_GRIB_TABLES environment variable can be set to specify the location of custom GRIB tables. It can either be set to a specific file name or to a directory containing custom GRIB tables files. These file names must begin with @@ -289,9 +301,96 @@ References: | `NCEP WMO GRIB2 Documentation `_ | +OMP_NUM_THREADS +^^^^^^^^^^^^^^^ + +**Introduction** + +There are a number of different ways of parallelizing code. OpenMP offers +parallelism within a single shared-memory workstation or supercomputer node. +The programmer writes OpenMP directives into the code to parallelize +particular code regions. + +When a parallelized code region is reached, which we shall hereafter call a +parallel region, a number of threads are spawned and work is shared among them. +Running on different cores, this reduces the execution time. At the end of the +parallel region, the code returns to single-thread execution. + +A limited number of code regions are parallelized in MET. As a consequence, +there are limits to the overall speed gains acheivable. Only the parallel +regions of code will get faster with more threads, leaving the remaining +serial portions to dominate the runtime. + +Not all top-level executables use parallelized code. If OpenMP is available, +a log message will appear inviting the user to increase the number of threads +for faster runtimes. + +**Setting the number of threads** + +The number of threads is controlled by the environment variable +*OMP_NUM_THREADS* . For example, on a quad core machine, the user might choose +to run on 4 threads: + +.. code :: bash + + export OMP_NUM_THREADS=4 + +Alternatively, the variable may be specified as a prefix to the executable +itself. For example: + +.. code :: bash + + OMP_NUM_THREADS=4 + +The case where this variable remains unset is handled inside the code, which +defaults to a single thread. + +There are choices when deciding how many threads to use. To perform a single run +as fast as possible, it would likely be appropriate to use as many threads as +there are (physical) cores available on the specific system. However, it is not +a cast-iron guarantee that more threads will always translate into more speed. +In theory, there is a chance that running across multiple non-uniform memory +access (NUMA) regions may carry negative performance impacts. This has not been +observed in practice, however. + +A lower thread count is appropriate when time-to-solution is not so critical, +because cores remain idle when the code is not inside a parallel region. Fewer +threads typically means better resource utilization. + +**Which code is parallelized?** + +Regions of parallelized code are: + + * :code:`fractional_coverage (data_plane_util.cc)` + +Only the following top-level executables can presently benefit from OpenMP +parallelization: + + * :code:`grid_stat` + * :code:`ensemble_stat` + * :code:`grid_ens_prod` + +**Thread Binding** + +It is normally beneficial to bind threads to particular cores, sometimes called +*affinitization*. There are a few reasons for this, but at the very least it +guarantees that threads remain evenly distributed across the available cores. +Otherwise, the operating system may migrate threads between cores during a run. + +OpenMP provides some environment variables to handle this: :code:`OMP_PLACES` +and :code:`OMP_PROC_BIND`. We anticipate that the effect of setting only +:code:`OMP_PROC_BIND=true` would be neutral-to-positive. + +However, there are sometimes compiler-specific environment variables. Instead, +thread affinitization is sometimes handled by MPI launchers, since OpenMP is +often used in MPI codes to reduce intra-node communications. + +Where code is running in a production context, it is worth being familiar with +the binding / affinitization method on the particular system and building it +into any relevant scripting. Settings common to multiple tools -_________________________________ +--------------------------------- .. _exit_on_warning: @@ -2190,10 +2289,10 @@ are empty. Note: grib_code 11 is equivalent to obs_var "TMP". } Settings specific to individual tools -_____________________________________ +------------------------------------- EnsembleStatConfig_default -~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^ .. _ens: @@ -2505,7 +2604,7 @@ used for random assignment of ranks when they are tied. } MODEAnalysisConfig_default -~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^ MODE line options are used to create filters that determine which MODE output lines are read in and processed. The MODE line options are numerous. They @@ -2843,7 +2942,7 @@ MET User's Guide for a description of these attributes. MODEConfig_default -~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^ .. _quilt: @@ -3158,7 +3257,7 @@ much more flexible "regrid" option may be used instead. shift_right = 0; PB2NCConfig_default -~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^ The PB2NC tool filters out observations from PREPBUFR or BUFR files using the following criteria: @@ -3484,7 +3583,7 @@ stack (most quality controlled) or the bottom of the event stack (most raw). event_stack_flag = TOP; SeriesAnalysisConfig_default -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. _block_size: @@ -3539,7 +3638,7 @@ grid is large. } STATAnalysisConfig_default -~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^ .. _jobs: @@ -4008,7 +4107,7 @@ confidence intervals computed for the aggregated statistics. vif_flag = FALSE; WaveletStatConfig_default -~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^ .. _grid_decomp_flag: @@ -4099,7 +4198,7 @@ similar to the "fcst_raw_plot" described in the "Settings common to multiple tools" section. WWMCARegridConfig_default -~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^ .. _to_grid: diff --git a/met/docs/Users_Guide/installation.rst b/met/docs/Users_Guide/installation.rst index 963545ac2f..b4459fed0d 100644 --- a/met/docs/Users_Guide/installation.rst +++ b/met/docs/Users_Guide/installation.rst @@ -290,6 +290,14 @@ Enable compilation of the MODE-Graphics tool. Requires $MET_CAIRO and $MET_FREET Disable use of BLOCK4 in the compilation. Use this if you have trouble using PrepBUFR files. +.. code-block:: none + + --disable-openmp + +Disable compilation of OpenMP directives within the code which allows some code +regions to benefit from thread-parallel execution. Runtime environment variable +:code:`OMP_NUM_THREADS` controls the number of threads. + Run the configure script with the **-help** argument to see the full list of configuration options. Make Targets diff --git a/met/src/basic/vx_util/Makefile.am b/met/src/basic/vx_util/Makefile.am index 57b7a100db..ae466f0963 100644 --- a/met/src/basic/vx_util/Makefile.am +++ b/met/src/basic/vx_util/Makefile.am @@ -69,6 +69,7 @@ libvx_util_a_SOURCES = ascii_table.cc ascii_table.h \ GridOffset.h GridOffset.cc \ observation.h observation.cc \ stat_column_defs.h \ + handle_openmp.h handle_openmp.cc \ RectangularTemplate.h RectangularTemplate.cc $(OPT_PYTHON_SOURCES) libvx_util_a_CPPFLAGS = ${MET_CPPFLAGS} diff --git a/met/src/basic/vx_util/data_plane_util.cc b/met/src/basic/vx_util/data_plane_util.cc index 3f5b5f7278..39fbb67679 100644 --- a/met/src/basic/vx_util/data_plane_util.cc +++ b/met/src/basic/vx_util/data_plane_util.cc @@ -16,6 +16,10 @@ using namespace std; #include #include +#ifdef _OPENMP + #include "omp.h" +#endif + #include "data_plane_util.h" #include "interp_util.h" #include "two_to_one.h" @@ -247,83 +251,95 @@ void fractional_coverage(const DataPlane &dp, DataPlane &frac_dp, } } - // Build the grid template - GridTemplateFactory gtf; - GridTemplate* gt = gtf.buildGT(shape, width, wrap_lon); - - mlog << Debug(3) - << "Computing fractional coverage field using the " - << t.get_str() << " threshold and the " - << interpmthd_to_string(InterpMthd_Nbrhd) << "(" << gt->size() - << ") " << gt->getClassName() << " interpolation method.\n"; - - // Initialize the fractional coverage field - frac_dp = dp; - frac_dp.set_constant(bad_data_double); - - // Compute the fractional coverage meeting the threshold criteria - for(x=0; xgetFirstInGrid(x, y, dp.nx(), dp.ny()); - gp != NULL; - gp = gt->getNextInGrid()) { - if(is_bad_data(v = dp.get(gp->x, gp->y))) continue; - n_vld++; - if(t.check(v, - (use_climo ? cmn->get(gp->x, gp->y) : bad), - (use_climo ? csd->get(gp->x, gp->y) : bad))) n_thr++; - } - } - // Subtract off the bottom edge, shift up, and add the top. - else { - - // Subtract points from the the bottom edge - for(gp = gt->getFirstInBotEdge(); - gp != NULL; - gp = gt->getNextInBotEdge()) { - if(is_bad_data(v = dp.get(gp->x, gp->y))) continue; - n_vld--; - if(t.check(v, - (use_climo ? cmn->get(gp->x, gp->y) : bad), - (use_climo ? csd->get(gp->x, gp->y) : bad))) n_thr--; - } - - // Increment Y - gt->incBaseY(1); - - // Add points from the the top edge - for(gp = gt->getFirstInTopEdge(); - gp != NULL; - gp = gt->getNextInTopEdge()) { - if(is_bad_data(v = dp.get(gp->x, gp->y))) continue; - n_vld++; - if(t.check(v, - (use_climo ? cmn->get(gp->x, gp->y) : bad), - (use_climo ? csd->get(gp->x, gp->y) : bad))) n_thr++; - } - } - - // Check for enough valid data and compute fractional coverage - if((double)(n_vld)/gt->size() >= vld_t && n_vld != 0) { - frac_dp.set((double) n_thr/n_vld, x, y); - } - - } // end for y - - // Increment X - if(x < (dp.nx() - 1)) gt->incBaseX(1); - - } // end for x - - delete gt; +#pragma omp parallel default(none) \ + shared(mlog, dp, frac_dp, width, wrap_lon, t) \ + shared(use_climo, cmn, csd, vld_t, bad) \ + private(x, y, n_vld, n_thr, gp, v) + { + + // Build the grid template + GridTemplateFactory gtf; + GridTemplate* gt = gtf.buildGT(shape, width, wrap_lon); + +#pragma omp single + { + mlog << Debug(3) + << "Computing fractional coverage field using the " + << t.get_str() << " threshold and the " + << interpmthd_to_string(InterpMthd_Nbrhd) << "(" << gt->size() + << ") " << gt->getClassName() << " interpolation method.\n"; + + // Initialize the fractional coverage field + frac_dp = dp; + frac_dp.set_constant(bad_data_double); + } + + // Compute the fractional coverage meeting the threshold criteria +#pragma omp for schedule (static) + for(x=0; xgetFirstInGrid(x, y, dp.nx(), dp.ny()); + gp != NULL; + gp = gt->getNextInGrid()) { + if(is_bad_data(v = dp.get(gp->x, gp->y))) continue; + n_vld++; + if(t.check(v, + (use_climo ? cmn->get(gp->x, gp->y) : bad), + (use_climo ? csd->get(gp->x, gp->y) : bad))) n_thr++; + } + } + // Subtract off the bottom edge, shift up, and add the top. + else { + + // Subtract points from the the bottom edge + for(gp = gt->getFirstInBotEdge(); + gp != NULL; + gp = gt->getNextInBotEdge()) { + if(is_bad_data(v = dp.get(gp->x, gp->y))) continue; + n_vld--; + if(t.check(v, + (use_climo ? cmn->get(gp->x, gp->y) : bad), + (use_climo ? csd->get(gp->x, gp->y) : bad))) n_thr--; + } + + // Increment Y + gt->incBaseY(1); + + // Add points from the the top edge + for(gp = gt->getFirstInTopEdge(); + gp != NULL; + gp = gt->getNextInTopEdge()) { + if(is_bad_data(v = dp.get(gp->x, gp->y))) continue; + n_vld++; + if(t.check(v, + (use_climo ? cmn->get(gp->x, gp->y) : bad), + (use_climo ? csd->get(gp->x, gp->y) : bad))) n_thr++; + } + } + + // Check for enough valid data and compute fractional coverage + if((double)(n_vld)/gt->size() >= vld_t && n_vld != 0) { + frac_dp.set((double) n_thr/n_vld, x, y); + } + + } // end for y + + // Increment X + if(x < (dp.nx() - 1)) gt->incBaseX(1); + + } // end for x + + delete gt; + + } // End of omp parallel return; } diff --git a/met/src/basic/vx_util/handle_openmp.cc b/met/src/basic/vx_util/handle_openmp.cc new file mode 100644 index 0000000000..f23d09b4ca --- /dev/null +++ b/met/src/basic/vx_util/handle_openmp.cc @@ -0,0 +1,52 @@ +// *=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=* +// ** Copyright UCAR (c) 1992 - 2021 +// ** University Corporation for Atmospheric Research (UCAR) +// ** National Center for Atmospheric Research (NCAR) +// ** Research Applications Lab (RAL) +// ** P.O.Box 3000, Boulder, Colorado, 80307-3000, USA +// *=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=* + +/////////////////////////////////////////////////////////////////////////////// + +#include + +#ifdef _OPENMP + #include "omp.h" +#endif + +#include "vx_log.h" +#include "handle_openmp.h" + +/////////////////////////////////////////////////////////////////////////////// + +void init_openmp() { + +#ifdef _OPENMP + + // If OMP_NUM_THREADS was not set, use the OpenMP API to set the thread count + // to 1 thread only. + const char* env_omp_num_threads = std::getenv("OMP_NUM_THREADS"); + if (!env_omp_num_threads) { + mlog << Debug(2) << "OMP_NUM_THREADS is not set." + << " Defaulting to 1 thread." + << " Recommend setting OMP_NUM_THREADS for faster runtimes.\n"; + omp_set_num_threads(1); + } + +#pragma omp parallel +#pragma omp single + { + mlog << Debug(2) << "OpenMP running on " + << omp_get_num_threads() << " thread(s).\n"; + } + +#else /* _OPENMP */ + + mlog << Debug(2) << "OpenMP disabled.\n"; + +#endif /* _OPENMP */ + +} + +/////////////////////////////////////////////////////////////////////////////// + diff --git a/met/src/basic/vx_util/handle_openmp.h b/met/src/basic/vx_util/handle_openmp.h new file mode 100644 index 0000000000..e2ea2e6a38 --- /dev/null +++ b/met/src/basic/vx_util/handle_openmp.h @@ -0,0 +1,24 @@ +// *=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=* +// ** Copyright UCAR (c) 1992 - 2021 +// ** University Corporation for Atmospheric Research (UCAR) +// ** National Center for Atmospheric Research (NCAR) +// ** Research Applications Lab (RAL) +// ** P.O.Box 3000, Boulder, Colorado, 80307-3000, USA +// *=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=* + +/////////////////////////////////////////////////////////////////////////////// + +#ifndef __HANDLE_OPENMP_H___ +#define __HANDLE_OPENMP_H___ + +/////////////////////////////////////////////////////////////////////////////// + +void init_openmp(); + +/////////////////////////////////////////////////////////////////////////////// + +#endif /* __HANDLE_OPENMP_H__ */ + +/////////////////////////////////////////////////////////////////////////////// + + diff --git a/met/src/tools/core/ensemble_stat/ensemble_stat.cc b/met/src/tools/core/ensemble_stat/ensemble_stat.cc index fe5d160157..ab296a2528 100644 --- a/met/src/tools/core/ensemble_stat/ensemble_stat.cc +++ b/met/src/tools/core/ensemble_stat/ensemble_stat.cc @@ -92,6 +92,8 @@ using namespace std; #include "nc_obs_util.h" #include "nc_point_obs_in.h" +#include "handle_openmp.h" + //////////////////////////////////////////////////////////////////////// static void process_command_line (int, char **); @@ -171,6 +173,9 @@ static void set_compress(const StringArray &); int main(int argc, char *argv[]) { + // Set up OpenMP (if enabled) + init_openmp(); + // Set handler to be called for memory allocation error set_new_handler(oom); diff --git a/met/src/tools/core/grid_stat/grid_stat.cc b/met/src/tools/core/grid_stat/grid_stat.cc index f420e568c4..e7055ed596 100644 --- a/met/src/tools/core/grid_stat/grid_stat.cc +++ b/met/src/tools/core/grid_stat/grid_stat.cc @@ -127,6 +127,8 @@ using namespace std; #include #include +#include "handle_openmp.h" + #include "grid_stat.h" #include "vx_statistics.h" @@ -185,6 +187,9 @@ static bool read_data_plane(VarInfo* info, DataPlane& dp, Met2dDataFile* mtddf, int main(int argc, char *argv[]) { + // Set up OpenMP (if enabled) + init_openmp(); + // Set handler to be called for memory allocation error set_new_handler(oom); diff --git a/met/src/tools/other/gen_ens_prod/gen_ens_prod.cc b/met/src/tools/other/gen_ens_prod/gen_ens_prod.cc index 1f634c38b2..786a56b390 100644 --- a/met/src/tools/other/gen_ens_prod/gen_ens_prod.cc +++ b/met/src/tools/other/gen_ens_prod/gen_ens_prod.cc @@ -45,6 +45,8 @@ using namespace std; #include "nc_obs_util.h" #include "nc_point_obs_in.h" +#include "handle_openmp.h" + //////////////////////////////////////////////////////////////////////// static void process_command_line(int, char **); @@ -81,6 +83,9 @@ static void set_ctrl_file (const StringArray &); int main(int argc, char *argv[]) { + // Set up OpenMP (if enabled) + init_openmp(); + // Set handler to be called for memory allocation error set_new_handler(oom); diff --git a/test/config/GridStatConfig_rtma b/test/config/GridStatConfig_rtma index 178a1269b1..2c3d3ba058 100644 --- a/test/config/GridStatConfig_rtma +++ b/test/config/GridStatConfig_rtma @@ -111,7 +111,7 @@ interp = { type = [ { method = NEAREST; width = 1; }, { method = UW_MEAN; width = 3; }, - { method = MIN; width = 1; }, + { method = MIN; width = 3; }, { method = MAX; width = 3; } ]; } diff --git a/test/xml/unit_grid_stat.xml b/test/xml/unit_grid_stat.xml index 1d562778f7..c31455c0ce 100644 --- a/test/xml/unit_grid_stat.xml +++ b/test/xml/unit_grid_stat.xml @@ -75,6 +75,32 @@ + + &MET_BIN;/grid_stat + + OMP_NUM_THREADS 2 + OUTPUT_PREFIX GRIB2_NAM_RTMA_NP2 + + \ + &DATA_DIR_MODEL;/grib2/nam/nam_2012040900_F012_gRtma.grib2 \ + &DATA_DIR_OBS;/rtma/rtma_2012051712_F000.grib2 \ + &CONFIG_DIR;/GridStatConfig_rtma \ + -outdir &OUTPUT_DIR;/grid_stat -v 1 + + + &OUTPUT_DIR;/grid_stat/grid_stat_GRIB2_NAM_RTMA_NP2_120000L_20120409_120000V.stat + &OUTPUT_DIR;/grid_stat/grid_stat_GRIB2_NAM_RTMA_NP2_120000L_20120409_120000V_fho.txt + &OUTPUT_DIR;/grid_stat/grid_stat_GRIB2_NAM_RTMA_NP2_120000L_20120409_120000V_ctc.txt + &OUTPUT_DIR;/grid_stat/grid_stat_GRIB2_NAM_RTMA_NP2_120000L_20120409_120000V_cts.txt + &OUTPUT_DIR;/grid_stat/grid_stat_GRIB2_NAM_RTMA_NP2_120000L_20120409_120000V_cnt.txt + &OUTPUT_DIR;/grid_stat/grid_stat_GRIB2_NAM_RTMA_NP2_120000L_20120409_120000V_sl1l2.txt + &OUTPUT_DIR;/grid_stat/grid_stat_GRIB2_NAM_RTMA_NP2_120000L_20120409_120000V_nbrctc.txt + &OUTPUT_DIR;/grid_stat/grid_stat_GRIB2_NAM_RTMA_NP2_120000L_20120409_120000V_nbrcts.txt + &OUTPUT_DIR;/grid_stat/grid_stat_GRIB2_NAM_RTMA_NP2_120000L_20120409_120000V_nbrcnt.txt + &OUTPUT_DIR;/grid_stat/grid_stat_GRIB2_NAM_RTMA_NP2_120000L_20120409_120000V_pairs.nc + + + &MET_BIN;/grid_stat