diff --git a/.github/jobs/docker_setup.sh b/.github/jobs/docker_setup.sh index ad20f64ecc..47116c0232 100755 --- a/.github/jobs/docker_setup.sh +++ b/.github/jobs/docker_setup.sh @@ -67,7 +67,7 @@ fi # skip docker push if credentials are not set -if [ -z ${DOCKER_USERNAME+x} ] || [ -z ${DOCKER_PASSWORD+x} ]; then +if [ -z "${DOCKER_USERNAME}" ] || [ -z "${DOCKER_PASSWORD}" ]; then echo "DockerHub credentials not set. Skipping docker push" exit 0 fi diff --git a/.github/parm/use_case_groups.json b/.github/parm/use_case_groups.json index 2f6814059a..cc41d9c034 100644 --- a/.github/parm/use_case_groups.json +++ b/.github/parm/use_case_groups.json @@ -218,5 +218,10 @@ "category": "tc_and_extra_tc", "index_list": "3-5", "run": false + }, + { + "category": "unstructured_grids", + "index_list": "0", + "run": false } ] diff --git a/docs/_static/unstructured_grids-StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.png b/docs/_static/unstructured_grids-StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.png new file mode 100644 index 0000000000..4475eeac05 Binary files /dev/null and b/docs/_static/unstructured_grids-StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.png differ diff --git a/docs/use_cases/model_applications/unstructured_grids/README.rst b/docs/use_cases/model_applications/unstructured_grids/README.rst new file mode 100644 index 0000000000..7b8d40d0f2 --- /dev/null +++ b/docs/use_cases/model_applications/unstructured_grids/README.rst @@ -0,0 +1,4 @@ +Unstructured Grids +------------------ + +Unstructured grids used by models for numerical weather prediction. diff --git a/docs/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.py b/docs/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.py new file mode 100644 index 0000000000..dd52057d21 --- /dev/null +++ b/docs/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.py @@ -0,0 +1,160 @@ +""" +StatAnalysis: UGRID +=========================================================================== + +model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf + +""" + +########################################### +# Scientific Objective +# -------------------- +# +# This use case demonstrates the use of python embedding to ingest and perform +# verification on an unstructured grid. This foregoes the need to interpolate +# to a regular grid as a step in the verification process, thereby avoiding +# any incurred interpolation error in the process. +# +# In particular, this use case ingests a UK MET Office LFRic forecast file in +# NetCDF format, which resides in the UGRID format of the cubed-sphere. The python +# library Iris was developed to perform analysis on various UGRID formats, and is +# employed here to ingest the file as well as perform direct interpolation +# from the native forecast grid to observation locations, thereby forming matched +# pairs to pass to stat_analysis. In order to perform the interpolation using a +# nearest-neighbors approach, the geovista python package is also used to form a +# KD tree to be used in identifying the interpolation points to be used. This +# package is located at https://github.com/bjlittle/geovista/ and can be installed +# from a development version. It is also required to install the pyvista python +# package. ASCII files containing observations are also ingested. +# +# The python embedding script itself performs the interpolation in time, and +# for this use case thins the observation data in order to reduce the run time. +# It is also noted that the observations for this use case were fabricated and +# correlated observation-forecast pairs are not expected. +# + +############################################################################## +# Datasets +# -------- +# +# +# | **Data source:** UK MET Office LFRic forecast files in UGRID NetCDF format and observations in ASCII format +# +# | **Location:** All of the input data required for this use case can be found in the met_test sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases +# | The tarball should be unpacked into the directory that you will set the value of INPUT_BASE. See `Running METplus`_ section for more information. +# | + +############################################################################## +# METplus Components +# ------------------ +# +# This use case utilizes the METplus StatAnalysis wrapper to search for +# files that are valid for the given case and generate a command to run +# the MET tool stat_analysis. + +############################################################################## +# METplus Workflow +# ---------------- +# +# StatAnalysis is the only tool called in this example. It processes the following +# run times: +# +# | **Valid:** 2021-05-05_00Z +# | **Forecast lead:** 12 hour +# | + +############################################################################## +# METplus Configuration +# --------------------- +# +# METplus first loads all of the configuration files found in parm/metplus_config, +# then it loads any configuration files passed to METplus via the command line +# with the -c option, i.e. -c parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf + +############################################################################## +# MET Configuration +# ----------------- +# +# METplus sets environment variables based on user settings in the METplus configuration file. +# See :ref:`How METplus controls MET config file settings` for more details. +# +# **YOU SHOULD NOT SET ANY OF THESE ENVIRONMENT VARIABLES YOURSELF! THEY WILL BE OVERWRITTEN BY METPLUS WHEN IT CALLS THE MET TOOLS!** +# +# If there is a setting in the MET configuration file that is currently not supported by METplus you'd like to control, please refer to: +# :ref:`Overriding Unsupported MET config file settings` +# +# .. note:: See the :ref:`StatAnalysis MET Configuration` section of the User's Guide for more information on the environment variables used in the file below: +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/met_config/STATAnalysisConfig_wrapped + +############################################################################## +# Python Embedding +# ---------------- +# +# This use case uses a Python embedding script to read input data +# +# parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/ugrid_lfric_mpr.py +# +# .. highlight:: python +# .. literalinclude:: ../../../../parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/ugrid_lfric_mpr.py +# + +############################################################################## +# Running METplus +# --------------- +# +# It is recommended to run this use case by: +# +# Passing in StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf then a user-specific system configuration file:: +# +# run_metplus.py -c /path/to/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf -c /path/to/user_system.conf +# +# The following METplus configuration variables must be set correctly to run this example.: +# +# * **INPUT_BASE** - Path to directory where sample data tarballs are unpacked (See Datasets section to obtain tarballs). +# * **OUTPUT_BASE** - Path where METplus output will be written. This must be in a location where you have write permissions +# * **MET_INSTALL_DIR** - Path to location where MET is installed locally +# +# Example User Configuration File:: +# +# [dir] +# INPUT_BASE = /path/to/sample/input/data +# OUTPUT_BASE = /path/to/output/dir +# MET_INSTALL_DIR = /path/to/met-X.Y +# +# **NOTE:** All of these items must be found under the [dir] section. +# + + +############################################################################## +# Expected Output +# --------------- +# +# A successful run will output the following both to the screen and to the logfile:: +# +# INFO: METplus has successfully finished running. +# +# Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. +# Output for this use case will be found in StatAnalysis_UGRID (relative to **OUTPUT_BASE**) +# and will contain the following file: +# +# * dump.out + +############################################################################## +# Keywords +# -------- +# +# .. note:: +# +# * StatAnalysisToolUseCase +# * PythonEmbeddingFileUseCase +# * UnstructureGridsUseCase +# +# Navigate to the :ref:`quick-search` page to discover other similar use cases. +# +# +# sphinx_gallery_thumbnail_path = '_static/unstructured_grids-StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.png' diff --git a/internal/tests/use_cases/all_use_cases.txt b/internal/tests/use_cases/all_use_cases.txt index ef633503aa..a41b56b269 100644 --- a/internal/tests/use_cases/all_use_cases.txt +++ b/internal/tests/use_cases/all_use_cases.txt @@ -166,3 +166,7 @@ Category: tc_and_extra_tc 3::GridStat_fcstHAFS_obsTDR_NetCDF:: model_applications/tc_and_extra_tc/GridStat_fcstHAFS_obsTDR_NetCDF.conf:: py_embed 4::TCPairs_TCStat_fcstADECK_obsBDECK_ATCF_BasicExample:: model_applications/tc_and_extra_tc/TCPairs_TCStat_fcstADECK_obsBDECK_ATCF_BasicExample.conf 5::TCGen_fcstGFS_obsBDECK_2021season:: model_applications/tc_and_extra_tc/TCGen_fcstGFS_obsBDECK_2021season.conf + + +Category: unstructured_grids +0::StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed:: model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf:: geovista_env, py_embed diff --git a/parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf b/parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf new file mode 100644 index 0000000000..bbca987ddf --- /dev/null +++ b/parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf @@ -0,0 +1,85 @@ +[config] + +# Documentation for this use case can be found at +# https://metplus.readthedocs.io/en/latest/generated/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.html + +# For additional information, please see the METplus Users Guide. +# https://metplus.readthedocs.io/en/latest/Users_Guide + +### +# Processes to run +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#process-list +### + +PROCESS_LIST = StatAnalysis + + +### +# Time Info +# LOOP_BY options are INIT, VALID, RETRO, and REALTIME +# If set to INIT or RETRO: +# INIT_TIME_FMT, INIT_BEG, INIT_END, and INIT_INCREMENT must also be set +# If set to VALID or REALTIME: +# VALID_TIME_FMT, VALID_BEG, VALID_END, and VALID_INCREMENT must also be set +# LEAD_SEQ is the list of forecast leads to process +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#timing-control +### + +LOOP_BY = VALID + +VALID_TIME_FMT = %Y%m%d%H +VALID_BEG=2021050500 +VALID_END=2021050500 +VALID_INCREMENT = 6H + +LEAD_SEQ = 0 + + +### +# File I/O +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#directory-and-filename-template-info +### + +MODEL1_STAT_ANALYSIS_LOOKIN_DIR = python {PARM_BASE}/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/ugrid_lfric_mpr.py {INPUT_BASE}/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/fcst_data/lfric_ver_20210505_0000.nc {INPUT_BASE}/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/obs_data + +STAT_ANALYSIS_OUTPUT_DIR = {OUTPUT_BASE}/StatAnalysis_UGRID +STAT_ANALYSIS_OUTPUT_TEMPLATE = job.out +MODEL1_STAT_ANALYSIS_DUMP_ROW_TEMPLATE = dump.out + + +### +# StatAnalysis Settings +# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#statanalysis +### + +MODEL1 = NA +MODEL1_OBTYPE = NA + +STAT_ANALYSIS_JOB_NAME = aggregate_stat +STAT_ANALYSIS_JOB_ARGS = -out_line_type CNT -dump_row [dump_row_file] -line_type MPR -by FCST_VAR + +MODEL_LIST = +DESC_LIST = +FCST_LEAD_LIST = +OBS_LEAD_LIST = +FCST_VALID_HOUR_LIST = +FCST_INIT_HOUR_LIST = +OBS_VALID_HOUR_LIST = +OBS_INIT_HOUR_LIST = +FCST_VAR_LIST = +OBS_VAR_LIST = +FCST_UNITS_LIST = +OBS_UNITS_LIST = +FCST_LEVEL_LIST = +OBS_LEVEL_LIST = +VX_MASK_LIST = +INTERP_MTHD_LIST = +INTERP_PNTS_LIST = +FCST_THRESH_LIST = +OBS_THRESH_LIST = +COV_THRESH_LIST = +ALPHA_LIST = +LINE_TYPE_LIST = + +GROUP_LIST_ITEMS = +LOOP_LIST_ITEMS = MODEL_LIST diff --git a/parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/ugrid_lfric_mpr.py b/parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/ugrid_lfric_mpr.py new file mode 100644 index 0000000000..d78e17e82c --- /dev/null +++ b/parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/ugrid_lfric_mpr.py @@ -0,0 +1,203 @@ +from __future__ import print_function + +import math +import pandas as pd +import numpy as np +import os +from glob import glob +import sys +import xarray as xr +import datetime as dt +import iris +from iris.experimental.ugrid import PARSE_UGRID_ON_LOAD +#geovista from https://github.com/bjlittle/geovista/ +import geovista as gv +import geovista.theme +from geovista.common import to_xyz +import netCDF4 +import pyvista as pv +from pykdtree.kdtree import KDTree + +from pathlib import Path +from typing import Optional + +import matplotlib.pyplot as plt + +print(f"{iris.__version__=}") +print(f"{gv.__version__=}") + +######################################################################## + +def read_ascii_obs(files): + paths = sorted(glob(files)) + datasets = [pd.read_table(p, header=None, delim_whitespace=True) for p in paths] + combined = pd.concat(datasets) + return combined + +def load_ugrid( + fname: str, + data: Optional[bool] = False, + constraint: Optional[str] = None, + verbose: Optional[bool] = False +) -> pv.PolyData: +# fname = BASE_DIR / fname + with PARSE_UGRID_ON_LOAD.context(): + cube = iris.load_cube(fname, constraint=constraint) + + if cube.ndim > 1: + cube = cube[(0,) * (cube.ndim - 1)] + + if verbose: + print(cube) + + data = cube.data if data else None + + face_node = cube.mesh.face_node_connectivity + indices = face_node.indices_by_location() + lons, lats = cube.mesh.node_coords + + mesh = gv.Transform.from_unstructured( + lons.points, + lats.points, + indices, + data=data, + start_index=face_node.start_index, + name=cube.name(), + ) + + if data is None: + mesh.active_scalars_name = None + + return mesh + +def info(mesh: pv.PolyData) -> None: + print(f"The mesh is a C{int(math.sqrt(mesh.n_cells / 6))}, with 6 panels, {int(mesh.n_cells / 6):,d} cells per panel, and {mesh.n_cells:,d} cells.") + +def find_nearest(tree, points, poi, k): + # lat/lon to xyz + xyz = to_xyz(*poi) + + # find the k nearest euclidean neighbours + dist, idxs = tree.query(xyz, k=k) + + if idxs.ndim > 1: + idxs = idxs[0] + + # retieve the associated xyz points of the k nearest neighbours + nearest = points[idxs] + + return xyz, nearest, idxs + +def to_centers(mesh: pv.PolyData) -> pv.PolyData: + tmp = mesh.copy() + tmp.clear_cell_data() + tmp.clear_point_data() + tmp.clear_field_data() + return tmp.cell_centers() + +######################################################################## +print('Python Script:\t', sys.argv[0]) + +# Input is directory of .nc lfric files and a directory of ascii obs filess + +if len(sys.argv) == 3: + # Read the input file as the first argument + input_fcst_dir = os.path.expandvars(sys.argv[1]) + input_obs_dir = os.path.expandvars(sys.argv[2]) + try: + print("Input Forecast Dir:\t" + repr(input_fcst_dir)) + print("Input Observations Dir:\t" + repr(input_obs_dir)) + + #Read all obs from directory + obs_data = read_ascii_obs(input_obs_dir+'/*.ascii') + print(obs_data.shape) + obs_data = obs_data.iloc[::1000, :]#thin for testing + obs_data = obs_data.rename(columns={0:'message_type', 1:'station_id', 2:'obs_valid_time', 3:'obs_lat', 4:'obs_lon', \ + 5:'elevation', 6:'var_name', 7:'level', 8:'height', 9:'qc_string', 10:'obs_value'}) + + obs_vars = ['UGRD', 'VGRD', 'TMP', 'RH'] + fcst_vars = ['u10m', 'v10m', 't1p5m', 'rh1p5m'] + + #open the netcdf forecast to access data values and list of times + fcst_data = xr.open_dataset(input_fcst_dir) + fcst_times = pd.to_datetime(fcst_data.coords['time_centered']) + + match_df = pd.DataFrame(columns=['message_type', 'station_id', 'obs_valid_time', 'obs_lat', 'obs_lon', \ + 'elevation', 'var_name', 'level', 'height', 'qc_string', 'obs_value', 'idx_nearest, fcst_value']) + + for idx1, (obs_var, fcst_var) in enumerate(zip(obs_vars, fcst_vars)): + + #load forecast as an iris cube + fcst_mesh = load_ugrid(input_fcst_dir, constraint=fcst_var) + info(fcst_mesh) + + #get indices of nearest cell center + fcst_centers = to_centers(fcst_mesh) + points = fcst_centers.points + tree = KDTree(points) + + #get the forecast data values loaded + fcst_df = fcst_data[fcst_var].to_dataframe() + print(fcst_df) + + #get obs data for variable + var_data = obs_data.loc[obs_data['var_name'] == obs_var].reset_index(drop=True) + + for idx2, row in var_data.iterrows(): + xyz, nearest, idx_nearest = find_nearest(tree, points, [row['obs_lat'], row['obs_lon']], k=1) + var_data.at[idx2,'idx_nearest'] = int(idx_nearest) + + #get the obs time, search for closest in the forecast data + time = dt.datetime.strptime(row['obs_valid_time'],'%Y%m%d_%H%M%S') + match_time = min(fcst_times, key=lambda d: abs(d - time)) + match_idx = np.argmin(np.abs(fcst_times - time)) + + #add matched fcst value to data + var_data.at[idx2, 'fcst_value'] = fcst_df.loc[(match_idx,int(idx_nearest)), fcst_var] + var_data.at[idx2, 'fcst_lat'] = fcst_df.loc[(match_idx,int(idx_nearest)), 'Mesh2d_face_x'] + var_data.at[idx2, 'fcst_lon'] = fcst_df.loc[(match_idx,int(idx_nearest)), 'Mesh2d_face_y'] + var_data.at[idx2, 'fcst_time'] = fcst_df.loc[(match_idx,int(idx_nearest)), 'time_centered'] + + #check results + #with pd.option_context('display.max_rows', None): + # print(var_data[['obs_lat','fcst_lat','obs_lon','fcst_lon','obs_value','fcst_value','obs_valid_time','fcst_time']]) + with pd.option_context('display.max_columns', 500, 'display.max_rows', 100, 'display.width', 500): + print(var_data) + ob_vals = var_data['obs_value'].values + f_vals = var_data['fcst_value'].values + + match_df = pd.concat([match_df, var_data], ignore_index=True) + + nlocs = len(match_df.index) + print('Number of locations in matched set: ' + str(nlocs)) + + # Add additional columns + match_df['lead'] = '000000' + match_df['MPR'] = 'MPR' + match_df['nobs'] = nlocs + match_df['index'] = range(0,nlocs) + match_df['na'] = 'NA' + match_df['QC'] = '0' + + # Arrange columns in MPR format + cols = ['na','na','lead','obs_valid_time','obs_valid_time','lead','obs_valid_time', + 'obs_valid_time','var_name','na','lead','var_name','na','na', + 'var_name','na','na','lead','na','na','na','na','MPR', + 'nobs','index','station_id','obs_lat','obs_lon', + 'level','na','fcst_value','obs_value', + 'QC','na','na'] + + match_df = match_df[cols] + + # Into a list and all to strings + mpr_data = [list( map(str,i) ) for i in match_df.values.tolist() ] + + except NameError: + print("Can't find the input files or the variables.") + print("Variables in this file:\t" + repr(var_list)) +else: + print("ERROR: ugrid_lfric_mpr.py -> Must specify directory of files.\n") + sys.exit(1) + +######################################################################## +