Merge pull request #1 from dtcenter/develop

Develop
dtcenter · Feb 2, 2022 · c1a1592 · c1a1592
2 parents 3d288b9 + 8e77616
commit c1a1592
Show file tree

Hide file tree

Showing 124 changed files with 4,974 additions and 1,949 deletions.
diff --git a/.github/jobs/get_use_case_commands.py b/.github/jobs/get_use_case_commands.py
@@ -129,7 +129,8 @@ def main(categories, subset_list, work_dir=None,
 
             setup_env, py_embed_arg = handle_automation_env(host_name, reqs, work_dir)
 
-            use_case_cmds = []
+            # use status variable to track if any use cases failed
+            use_case_cmds = ['status=0']
             for use_case in use_case_by_requirement.use_cases:
                 # add parm/use_cases path to config args if they are conf files
                 config_args = []
@@ -147,7 +148,12 @@ def main(categories, subset_list, work_dir=None,
                                 f" {py_embed_arg}{test_settings_conf}"
                                 f" config.OUTPUT_BASE={output_base}")
                 use_case_cmds.append(use_case_cmd)
+                # check exit code from use case command and
+                # set status to non-zero value on error
+                use_case_cmds.append("if [ $? != 0 ]; then status=1; fi")
 
+            # if any use cases failed, force non-zero exit code with false
+            use_case_cmds.append("if [ $status != 0 ]; then false; fi")
             # add commands to set up environment before use case commands
             group_commands = f"{setup_env}{';'.join(use_case_cmds)}"
             all_commands.append((group_commands, reqs))

diff --git a/.github/jobs/set_job_controls.sh b/.github/jobs/set_job_controls.sh
@@ -13,6 +13,7 @@ run_use_cases=true
 run_save_truth_data=false
 run_all_use_cases=false
 run_diff=false
+external_trigger=false
 
 # run all use cases and diff logic for pull request
 if [ "${GITHUB_EVENT_NAME}" == "pull_request" ]; then
@@ -25,6 +26,12 @@ if [ "${GITHUB_EVENT_NAME}" == "pull_request" ]; then
     run_all_use_cases=true
     run_diff=true
   fi
+# run all use cases and diff logic for external workflow trigger
+elif [ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ]; then
+    run_use_cases=true
+    run_all_use_cases=true
+    run_diff=true
+    external_trigger=true
 # run all use cases and save truth data if -ref branch and not PR
 elif [ "${GITHUB_REF: -4}" == -ref ]; then
   run_use_cases=true
@@ -98,13 +105,15 @@ echo run_use_cases=${run_use_cases} >> job_control_status
 echo run_save_truth_data=${run_save_truth_data} >> job_control_status
 echo run_all_use_cases=${run_all_use_cases} >> job_control_status
 echo run_diff=${run_diff} >> job_control_status
+echo external_trigger=${external_trigger} >> job_control_status
 echo Job Control Settings:
 cat job_control_status
 
 echo ::set-output name=run_get_image::$run_get_image
 echo ::set-output name=run_get_input_data::$run_get_input_data
 echo ::set-output name=run_diff::$run_diff
 echo ::set-output name=run_save_truth_data::$run_save_truth_data
+echo ::set-output name=external_trigger::$external_trigger
 
 # get use cases to run
 .github/jobs/get_use_cases_to_run.sh $run_use_cases $run_all_use_cases $run_unit_tests
diff --git a/.github/parm/use_case_groups.json b/.github/parm/use_case_groups.json
@@ -59,6 +59,11 @@
     "index_list": "0-2",
     "run": false
   },
+  {
+    "category": "marine_and_cryosphere",
+    "index_list": "3-4",
+    "run": false
+  },
   {
     "category": "medium_range",
     "index_list": "0",
@@ -149,6 +154,11 @@
     "index_list": "11",
     "run": false
   },
+  {
+    "category": "s2s",
+    "index_list": "12",
+    "run": false 
+  },
   {
     "category": "space_weather",
     "index_list": "0-1",

diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
@@ -13,8 +13,31 @@ on:
     types: [opened, reopened, synchronize]
     paths-ignore:
       - docs/** 
+  workflow_dispatch:
+    inputs:
+      repository:
+        description: 'Repository that triggered workflow'
+        required: true
+      sha:
+        description: 'Commit hash that triggered the event'
+        required: true
+      ref:
+        description: 'Branch that triggered event'
+      actor:
+        description: 'User that triggered the event'
+      pusher_email:
+        description: 'Email address of user who triggered push event'
 
 jobs:
+  event_info:
+    name: "Trigger: ${{ github.event_name != 'workflow_dispatch' && github.event_name || github.event.inputs.repository }} ${{ github.event_name != 'workflow_dispatch' && 'local' || github.event.inputs.pusher_email }} ${{ github.event_name != 'workflow_dispatch' && 'event' || github.event.inputs.sha }}"
+    runs-on: ubuntu-latest
+    steps:
+      - name: Print GitHub values for reference
+        env:
+          GITHUB_CONTEXT: ${{ toJson(github) }}
+        run: echo "$GITHUB_CONTEXT"
+
   job_control:
     name: Determine which jobs to run
     runs-on: ubuntu-latest
@@ -25,12 +48,9 @@ jobs:
       run_get_input_data: ${{ steps.job_status.outputs.run_get_input_data }}
       run_diff: ${{ steps.job_status.outputs.run_diff }}
       run_save_truth_data: ${{ steps.job_status.outputs.run_save_truth_data }}
+      external_trigger: ${{ steps.job_status.outputs.external_trigger }}
     steps:
       - uses: actions/checkout@v2
-      - name: Print GitHub values for reference
-        env:
-          GITHUB_CONTEXT: ${{ toJson(github) }}
-        run: echo "$GITHUB_CONTEXT"
       - name: Set job controls
         id: job_status
         run: .github/jobs/set_job_controls.sh

diff --git a/ci/docker/docker_data/Dockerfile b/ci/docker/docker_data/Dockerfile
@@ -17,7 +17,8 @@ RUN if [ "x${MOUNTPT}" == "x" ]; then \
       exit 1; \
     fi
 
-ENV CASE_DIR=/data/input/METplus_Data
+ARG DATA_DIR=/data/input/METplus_Data
+ENV CASE_DIR=${DATA_DIR}
 RUN mkdir -p ${CASE_DIR}
 
 RUN for URL in `echo ${TARFILE_URL} | tr "," " "`; do \

diff --git a/ci/docker/docker_env/scripts/metplotpy_env.sh b/ci/docker/docker_env/scripts/metplotpy_env.sh
@@ -10,7 +10,6 @@
 #   matplotlib==3.3.0
 #   scipy==1.5.1
 #   plotly==4.9.0
-#   pingouin==0.3.8
 #   cartopy==0.18.0
 #   eofs==1.3.0
 #   cmocean==2.0
@@ -34,7 +33,6 @@ conda create -y --clone ${BASE_ENV} --name ${ENV_NAME}
 conda install -y --name ${ENV_NAME} -c conda-forge matplotlib==3.3.0
 conda install -y --name ${ENV_NAME} -c conda-forge scipy==1.5.1
 conda install -y --name ${ENV_NAME} -c conda-forge plotly==4.9.0
-conda install -y --name ${ENV_NAME} -c conda-forge pingouin==0.3.8
 conda install -y --name ${ENV_NAME} -c conda-forge cartopy==0.18.0
 conda install -y --name ${ENV_NAME} -c conda-forge eofs==1.3.0
 conda install -y --name ${ENV_NAME} -c conda-forge cmocean==2.0

diff --git a/docs/Contributors_Guide/add_use_case.rst b/docs/Contributors_Guide/add_use_case.rst
@@ -144,8 +144,20 @@ Use Case Rules
 - The use case should be run by someone other than the author to ensure that it
   runs smoothly outside of the development environment set up by the author.
 
-.. _use_case_documentation:
+.. _memory-intense-use-cases:
+
+Use Cases That Exceed Github Actions Memory Limit
+-------------------------------------------------
+
+Below is a list of use cases in the repository that cannot be run in Github Actions 
+due to their excessive memory usage. They have been tested and cleared by reviewers 
+of any other issues and can be used by METplus users in the same manner as all 
+other use cases.
 
+- model_applications/marine_and_cryosphere/GridStat_fcstRTOFS_obsGHRSST_climWOA_sst
+
+.. _use_case_documentation:
+
 Document New Use Case
 ---------------------
 
@@ -191,21 +203,32 @@ use case OR category directory for a model_applications use case
     * Users are encouraged to copy an existing documentation file and modify it
       to describe the new use case.
 
-    * Update any references to the .conf file to use the correct name
+    * Update any references to the .conf file to use the correct name.
 
-    * Update the Scientific Objective section to describe the use case
+    * Update the Scientific Objective section to describe the use case.
 
-    * Update the description of the input data in the Datasets section
+    * Update the description of the input data in the Datasets section.
 
-    * Update the list of tools used in the METplus Components section
+    * Update the list of External Dependencies (if applicable) to include any
+      required Python packages.  Update the
+      `METplus  Components Python Requirements <https://docs.google.com/spreadsheets/d/1Lf-yxZBXTTnBaqCOWUk-jdP9RpaLfil_s-KKH29CkKU/edit#gid=0>`_
+      spreadsheet.  If the package is already listed in the spreadsheet, add
+      a link to the documentation page for this new use case, following the
+      format in the spreadsheet.  If the package is not already listed, update
+      the spreadsheet to include the name of the required package, the version,
+      the METplus component (e.g. METplus wrappers, METcalcpy, METplotpy), the
+      source, a brief description, and a link to this new use case that uses
+      this new Python package.
+
+    * Update the list of tools used in the METplus Components section.
 
-    * Update the list of run times in the METplus Workflow section
+    * Update the list of run times in the METplus Workflow section.
 
     * Update the list of keywords, referring to :ref:`quick-search` for
       a list of possible keywords to use (Note: The link text for the
       keywords must match the actual keyword exactly or it will not
       show up in the search, i.e. **ASCII2NCToolUseCase** must match
-      https://metplus.readthedocs.io/en/latest/search.html?q=**ASCII2NCToolUseCase**
+      https://metplus.readthedocs.io/en/latest/search.html?q=**ASCII2NCToolUseCase**.
 
     * Add an image to use as the thumbnail (if desired). Images can be added
       to the docs/_static directory and should be named <category>-<conf>.png
@@ -1024,6 +1047,24 @@ with "Use Case Tests." Click on the job and search for the use case config
 filename in the log output by using the search box on the top right of the
 log output.
 
+If the use case fails in GitHub Actions but runs successfully in the user's environment, 
+potential reasons include: 
+
+- Errors providing input data (see :ref:`use_case_input_data`)
+- Using hard-coded paths from the user's machine
+- Referencing variables set in the user's configuration file or local environment
+- Memory usuage of the use case exceeds the available memory in hte Github Actions environment
+
+Github Actions has `limited memory <https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources>`_
+available and will cause the use case to fail when exceeded. A failure caused by exceeding 
+the memory allocation in a Python Embedding script may result in an unclear error message. 
+If you suspect that this is the case, consider utilizing a Python memory profiler to check the
+Python script's memory usage. If your use case exceeds the limit, try to pare 
+down the data held in memory and use less memory intensive Python routines.
+
+If memory mitigation cannot move the use case’s memory usage below the Github Actions limit, 
+see :ref:`exceeded-Github-Actions` for next steps.
+
 Verify that the use case ran in a reasonable amount of time
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -1039,6 +1080,40 @@ run the set of use cases is now above 20 minutes or so, consider creating a
 new job for the new use case. See the :ref:`subset_category` section and the
 multiple medium_range jobs for an example.
 
+
+.. _exceeded-Github-Actions:
+
+Use Cases That Exceed Memory Allocations of Github Actions
+----------------------------------------------------------
+
+If a use case utilizing Python embedding does not run successfully in 
+Github Actions due to exceeding the memory limit and memory mitigation 
+steps were unsuccessful in lowering memory usage, please take the following steps.
+
+- Document the Github Actions failure in the Github use case issue. 
+  Utilize a Python memory profiler to identify as specifically as possible 
+  where the script exceeds the memory limit.
+- Add the use case to the :ref:`memory-intense-use-cases` list.
+- In the internal_tests/use_cases/all_use_cases.txt file, ensure that the 
+  use case is listed as the lowest-listed use case in its respective category. 
+  Change the number in front of the new use case to an 'X', preceeded 
+  by the ‘#’ character::
+
+	#X::GridStat_fcstRTOFS_obsGHRSST_climWOA_sst::model_applications/marine_and_cryosphere/GridStat_fcstRTOFS_obsGHRSST_climWOA_sst.conf, model_applications/marine_and_cryosphere/GridStat_fcstRTOFS_obsGHRSST_climWOA_sst/ci_overrides.conf:: icecover_env, py_embed
+
+- In the **.github/parm/use_case_groups.json** file, remove the entry that 
+  was added during the :ref:`add_new_category_to_test_runs` 
+  for the new use case. This will stop the use case from running on a pull request. 
+- Push these two updated files to your branch in Github and confirm that it 
+  now compiles successfully.
+- During the :ref:`create-a-pull-request` creation, inform the reviewer of 
+  the Github Actions failure. The reviewer should confirm the use case is 
+  successful when run manually, that the memory profiler output confirms that 
+  the Python embedding script exceeds the Github Actions limit, and that 
+  there are no other Github Actions compiling errors.
+
+.. _create-a-pull-request:
+
 Create a Pull Request
 =====================
 

diff --git a/docs/Contributors_Guide/basic_components.rst b/docs/Contributors_Guide/basic_components.rst
@@ -33,13 +33,11 @@ executable, relative to MET_BIN_DIR.
 The init function also calls the parent's initialization function
 using super() function::
 
-    def __init__(self, config, instance=None, config_overrides=None):
+    def __init__(self, config, instance=None):
         self.app_name = "ascii2nc"
         self.app_path = os.path.join(config.getdir('MET_BIN_DIR', ''),
                                      self.app_name)
-        super().__init__(config,
-                         instance=instance,
-                         config_overrides=config_overrides)
+        super().__init__(config, instance=instance)
 
 The above code block is an excerpt from the ASCII2NCWrapper,
 found in metplus/wrappers/ascii2nc_wrapper.py.
@@ -302,12 +300,12 @@ data type, extra info, children, and nicknames.
 * extra: Additional info as a comma separated string (see extra_args above)
 * children: Dictionary defining a nested dictionary where the key is the name
   of the sub-directory and the value is the item info (see items above)
-* nicknames: List of METplus variable names (with app name excluded) to also
+* nicknames: List of METplus variable names to also
   search and use if it is set. For example, the GridStat variable mask.poly is
   set by the METplus config variable GRID_STAT_MASK_POLY. However, in older
   versions of the METplus wrappers, the variable used was
   GRID_STAT_VERIFICATION_MASK_TEMPLATE. To preserve support for this name, the
-  nickname can be set to ['VERIFICATION_MASK_TEMPLATE'] and the old variable
+  nickname can be set to [f'{self.app_name.upper()}_VERIFICATION_MASK_TEMPLATE'] and the old variable
   will be checked if GRID_STAT_MASK_POLY is not set.
 
 Values must be set to None to preserve the order.
@@ -320,7 +318,7 @@ CompareGriddedWrapper and is used by GridStat, PointStat, and EnsembleStat::
 
     def handle_climo_cdf_dict(self):
         self.add_met_config_dict('climo_cdf', {
-            'cdf_bins': ('float', None, None, ['CLIMO_CDF_BINS']),
+            'cdf_bins': ('float', None, None, [f'{self.app_name.upper()}_CLIMO_CDF_BINS']),
             'center_bins': 'bool',
             'write_bins': 'bool',
         })
@@ -329,7 +327,7 @@ This function handles setting the climo_cdf dictionary. The METplus config
 variable that fits the format {APP_NAME}_{DICTIONARY_NAME}_{VARIABLE_NAME},
 i.e. GRID_STAT_CLIMO_CDF_CDF_BINS for GridStat's climo_cdf.cdf_bins, is
 quieried first. However, this default name is a little redundant, so adding
-the nickname 'CLIMO_CDF_BINS' allows the user to set the variable
+the nickname 'GRID_STAT_CLIMO_CDF_BINS' allows the user to set the variable
 GRID_STAT_CLIMO_CDF_BINS instead.
 
 There are many MET config dictionaries that only contain beg and end to define

diff --git a/docs/Contributors_Guide/create_wrapper.rst b/docs/Contributors_Guide/create_wrapper.rst
@@ -96,13 +96,11 @@ If the application is a MET tool, then set self.app_path to the full path
 of the tool under **MET_BIN_DIR**.
 See the Basic Components :ref:`bc_init_function` section for more information::
 
-    def __init__(self, config, instance=None, config_overrides=None):
+    def __init__(self, config, instance=None):
         self.app_name = 'new_tool'
         self.app_path = os.path.join(config.getdir('MET_BIN_DIR', ''),
                                      self.app_name)
-        super().__init__(config,
-                         instance=instance,
-                         config_overrides=config_overrides)
+        super().__init__(config, instance=instance)
 
 Read Configuration Variables
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^

diff --git a/docs/Release_Guide/release_steps/common/update_dtc_website.rst b/docs/Release_Guide/release_steps/common/update_dtc_website.rst
@@ -28,14 +28,14 @@ Update DTC Website
     * Add Link: Link text should be "User's Guide" and the URL should be the top
       level directory of the User's Guide hosted on the web. Beta releases can
       use "develop" in the URL, but for official releases, please ensure the
-      link uses the branch name (e.g. main_v4.0) as opposed to the tag name
-      (e.g. v4.0.0).  For example, use
-      "https://metplus.readthedocs.io/en/main_v4.0/Users_Guide/" and NOT
-      "https://metplus.readthedocs.io/en/v4.0.0/Users_Guide/"
+      link uses the branch name (e.g. main_vX.Y) as opposed to the tag name
+      (e.g. vX.Y.Z).  For example, use
+      "https://metplus.readthedocs.io/en/main_vX.Y/Users_Guide/" and NOT
+      "https://metplus.readthedocs.io/en/vX.Y.Z/Users_Guide/"
 
     * Add Link: Link text should be "Existing Builds and Docker" and the URL
       should be the latest Existing Builds page, i.e.
-      https://dtcenter.org/community-code/metplus/metplus-4-0-existing-builds
+      https://dtcenter.org/community-code/metplus/metplus-X-Y-existing-builds
       (If creating a new official release, be sure to add a new *Existing Builds
       and Docker* page, if one was not already created.)
 
@@ -44,8 +44,6 @@ Update DTC Website
 
   * Click on "Create Release".
 
-  * Click on "Save".
-
   * Update the existing releases, as needed.
 
     * For a development release, change any previous *Development*
@@ -58,3 +56,6 @@ Update DTC Website
       releases.
 
   * |otherWebsiteUpdates|
+
+  * Click on "Save".
+