Skip to content

Commit

Permalink
Feature 1453 use case iodav2 python embedding (#1930)
Browse files Browse the repository at this point in the history
Co-authored-by: George McCabe <23407799+georgemccabe@users.noreply.github.com>
  • Loading branch information
willmayfield and georgemccabe committed Nov 11, 2022
1 parent c930429 commit ecb04d7
Show file tree
Hide file tree
Showing 7 changed files with 345 additions and 1 deletion.
Empty file modified .github/jobs/docker_setup.sh
100755 → 100644
Empty file.
2 changes: 1 addition & 1 deletion .github/parm/use_case_groups.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
},
{
"category": "data_assimilation",
"index_list": "0",
"index_list": "0-1",
"run": false
},
{
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
"""
StatAnalysis: IODAv2
===========================================================================
model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf
"""

###########################################
# Scientific Objective
# --------------------
#
# This use case demonstrates the Stat-Analysis tool and ingestion of HofX NetCDF files
# that have been output from the Joint Effort for Data assimilation Integration (JEDI)
# data assimilation system. JEDI uses IODA version 2 formatted files, which are NetCDF files
# with certain requirements of variables and naming conventions. These files
# hold observations to be assimilated into forecasts, in this case taken from the JEDI software
# test data, which contained a small number of Global observation-forecast pairs
# derived from the hofx application.
#
# UFO is a component of HofX, which maps the background forecast to observation space
# to form O minus B pairs. The HofX application of JEDI takes the input IODAv2 files and
# adds an additional variable which is the forecast value as interpolated to the
# observation location. These HofX files are used as input to form Matched Pair (MPR)
# formatted lists via Python embedding. In this case, Stat-Analysis then performs an aggregate_stat
# job and outputs statistics in an ascii file.
#
# This use case adopts the IODAv2 formatted NetCDF files, which replace the previous variable
# formatting scheme to make use of NetCDF groups.

##############################################################################
# Datasets
# --------
#
#
# | **Data source:** JEDI HofX output files in IODAv2 format
#
# | **Location:** All of the input data required for this use case can be found in the met_test sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases
# | The tarball should be unpacked into the directory that you will set the value of INPUT_BASE. See `Running METplus`_ section for more information.
# |

##############################################################################
# METplus Components
# ------------------
#
# This use case utilizes the METplus StatAnalysis wrapper to search for
# files that are valid for the given case and generate a command to run
# the MET tool stat_analysis.

##############################################################################
# METplus Workflow
# ----------------
#
# StatAnalysis is the only tool called in this example. It processes the following
# run times:
#
# | **Valid:** 2018-04-15_00Z
# | **Forecast lead:** 0 hour
# |

##############################################################################
# METplus Configuration
# ---------------------
#
# METplus first loads all of the configuration files found in parm/metplus_config,
# then it loads any configuration files passed to METplus via the command line
# with the -c option, i.e. -c parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf
#
# .. highlight:: bash
# .. literalinclude:: ../../../../parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf

##############################################################################
# MET Configuration
# -----------------
#
# METplus sets environment variables based on user settings in the METplus configuration file.
# See :ref:`How METplus controls MET config file settings<metplus-control-met>` for more details.
#
# **YOU SHOULD NOT SET ANY OF THESE ENVIRONMENT VARIABLES YOURSELF! THEY WILL BE OVERWRITTEN BY METPLUS WHEN IT CALLS THE MET TOOLS!**
#
# If there is a setting in the MET configuration file that is currently not supported by METplus you'd like to control, please refer to:
# :ref:`Overriding Unsupported MET config file settings<met-config-overrides>`
#
# .. note:: See the :ref:`StatAnalysis MET Configuration<stat-analysis-met-conf>` section of the User's Guide for more information on the environment variables used in the file below:
#
# .. highlight:: bash
# .. literalinclude:: ../../../../parm/met_config/STATAnalysisConfig_wrapped

##############################################################################
# Python Embedding
# ----------------
#
# This use case uses a Python embedding script to read input data
#
# parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed/read_iodav2_mpr.py
#
# .. highlight:: python
# .. literalinclude:: ../../../../parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed/read_iodav2_mpr.py
#

##############################################################################
# Running METplus
# ---------------
#
# It is recommended to run this use case by:
#
# Passing in StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf then a user-specific system configuration file::
#
# run_metplus.py -c /path/to/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf -c /path/to/user_system.conf
#
# The following METplus configuration variables must be set correctly to run this example.:
#
# * **INPUT_BASE** - Path to directory where sample data tarballs are unpacked (See Datasets section to obtain tarballs).
# * **OUTPUT_BASE** - Path where METplus output will be written. This must be in a location where you have write permissions
# * **MET_INSTALL_DIR** - Path to location where MET is installed locally
#
# Example User Configuration File::
#
# [dir]
# INPUT_BASE = /path/to/sample/input/data
# OUTPUT_BASE = /path/to/output/dir
# MET_INSTALL_DIR = /path/to/met-X.Y
#
# **NOTE:** All of these items must be found under the [dir] section.
#


##############################################################################
# Expected Output
# ---------------
#
# A successful run will output the following both to the screen and to the logfile::
#
# INFO: METplus has successfully finished running.
#
# Refer to the value set for **OUTPUT_BASE** to find where the output data was generated.
# Output for this use case will be found in StatAnalysis_IODAv2 (relative to **OUTPUT_BASE**)
# and will contain the following file:
#
# * dump.out

##############################################################################
# Keywords
# --------
#
# .. note::
#
# * StatAnalysisToolUseCase
# * PythonEmbeddingFileUseCase
# * DataAssimilationUseCase
#
# Navigate to the :ref:`quick-search` page to discover other similar use cases.
#
#
# sphinx_gallery_thumbnail_path = '_static/data_assimilation-StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.png'
1 change: 1 addition & 0 deletions internal/tests/use_cases/all_use_cases.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ Category: climate

Category: data_assimilation
0::StatAnalysis_fcstHAFS_obsPrepBufr_JEDI_IODA_interface::model_applications/data_assimilation/StatAnalysis_fcstHAFS_obsPrepBufr_JEDI_IODA_interface.conf
0::StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed::model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf:: py_embed


Category: marine_and_cryosphere
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
[config]

# Documentation for this use case can be found at
# https://metplus.readthedocs.io/en/latest/generated/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.html

# For additional information, please see the METplus Users Guide.
# https://metplus.readthedocs.io/en/latest/Users_Guide

###
# Processes to run
# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#process-list
###

PROCESS_LIST = StatAnalysis


###
# Time Info
# LOOP_BY options are INIT, VALID, RETRO, and REALTIME
# If set to INIT or RETRO:
# INIT_TIME_FMT, INIT_BEG, INIT_END, and INIT_INCREMENT must also be set
# If set to VALID or REALTIME:
# VALID_TIME_FMT, VALID_BEG, VALID_END, and VALID_INCREMENT must also be set
# LEAD_SEQ is the list of forecast leads to process
# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#timing-control
###

LOOP_BY = VALID

VALID_TIME_FMT = %Y%m%d%H
VALID_BEG=2018041500
VALID_END=2018041500
VALID_INCREMENT = 12H

LEAD_SEQ = 0


###
# File I/O
# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#directory-and-filename-template-info
###

MODEL1_STAT_ANALYSIS_LOOKIN_DIR = python {PARM_BASE}/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed/read_iodav2_mpr.py {INPUT_BASE}/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed/sample_hofx_output_sondes.nc4

STAT_ANALYSIS_OUTPUT_DIR = {OUTPUT_BASE}/StatAnalysis_IODAv2
STAT_ANALYSIS_OUTPUT_TEMPLATE = job.out
MODEL1_STAT_ANALYSIS_DUMP_ROW_TEMPLATE = dump.out


###
# StatAnalysis Settings
# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#statanalysis
###

MODEL1 = NA
MODEL1_OBTYPE = NA

STAT_ANALYSIS_JOB_NAME = aggregate_stat
STAT_ANALYSIS_JOB_ARGS = -out_line_type CNT -dump_row [dump_row_file] -line_type MPR -by FCST_VAR

MODEL_LIST =
DESC_LIST =
FCST_LEAD_LIST =
OBS_LEAD_LIST =
FCST_VALID_HOUR_LIST =
FCST_INIT_HOUR_LIST =
OBS_VALID_HOUR_LIST =
OBS_INIT_HOUR_LIST =
FCST_VAR_LIST =
OBS_VAR_LIST =
FCST_UNITS_LIST =
OBS_UNITS_LIST =
FCST_LEVEL_LIST =
OBS_LEVEL_LIST =
VX_MASK_LIST =
INTERP_MTHD_LIST =
INTERP_PNTS_LIST =
FCST_THRESH_LIST =
OBS_THRESH_LIST =
COV_THRESH_LIST =
ALPHA_LIST =
LINE_TYPE_LIST =

GROUP_LIST_ITEMS =
LOOP_LIST_ITEMS = MODEL_LIST
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
from __future__ import print_function

import pandas as pd
import os
from glob import glob
import sys
import xarray as xr
import datetime as dt

########################################################################

print('Python Script:\t', sys.argv[0])

# Input is .nc or .nc4 file

if len(sys.argv) == 2:
# Read the input file as the first argument
input_path = os.path.expandvars(sys.argv[1])
try:
print("Input File:\t" + repr(input_path))

# Read all the needed groups
ioda_data = xr.open_dataset(input_path, group = 'MetaData')
ioda_hofx_data = xr.open_dataset(input_path, group = 'hofx')

hofx_vars = list(ioda_hofx_data.keys())

# use dataframes
ioda_df = ioda_data.to_dataframe()
ioda_data.close()

for var_name in hofx_vars:
ioda_df[var_name + '@hofx'] = ioda_hofx_data[var_name]

# Add columns for needed attributes, for each variable present for hofx
for attribute in ['ObsValue', 'ObsType', 'EffectiveQC']:
ioda_attr_data = xr.open_dataset(input_path, group = attribute)
for var_name in hofx_vars:
ioda_df[var_name + '@' + attribute] = ioda_attr_data[var_name]

ioda_attr_data.close()
ioda_hofx_data.close()

nlocs = len(ioda_df.index)
print('Number of locations in set: ' + str(nlocs))

# Decode strings
time = list(ioda_df['datetime'])

for i in range(0,nlocs):
temp = dt.datetime.strptime(time[i], '%Y-%m-%dT%H:%M:%SZ')
time[i] = temp.strftime('%Y%m%d_%H%M%S')

ioda_df['datetime'] = time

#set up MPR data
mpr_data = []

for var_name in hofx_vars:

# Set up the needed columns
ioda_df_var = ioda_df[['datetime','station_id',var_name+'@ObsType',
'latitude','longitude','air_pressure',
var_name+'@hofx',var_name+'@ObsValue',
var_name+'@EffectiveQC']]

# Cute down to locations with valid ObsValues
ioda_df_var = ioda_df_var[abs(ioda_df_var[var_name+'@ObsValue']) < 1e6]
nlocs = len(ioda_df_var.index)
print(var_name+' has '+str(nlocs)+' valid obs.')

# Add additional columns
ioda_df_var['lead'] = '000000'
ioda_df_var['MPR'] = 'MPR'
ioda_df_var['nobs'] = nlocs
ioda_df_var['index'] = range(0,nlocs)
ioda_df_var['varname'] = var_name
ioda_df_var['na'] = 'NA'

# Arrange columns in MPR format
cols = ['na','na','lead','datetime','datetime','lead','datetime',
'datetime','varname','na','lead','varname','na','na',
var_name+'@ObsType','na','na','lead','na','na','na','na','MPR',
'nobs','index','station_id','latitude','longitude',
'air_pressure','na',var_name+'@hofx',var_name+'@ObsValue',
var_name+'@EffectiveQC','na','na']

ioda_df_var = ioda_df_var[cols]

# Into a list and all to strings
mpr_data = mpr_data + [list( map(str,i) ) for i in ioda_df_var.values.tolist() ]

print("Total Length:\t" + repr(len(mpr_data)))

except NameError:
print("Can't find the input file.")
print("HofX variables in this file:\t" + repr(hofx_vars))
else:
print("ERROR: read_iodav2_mpr.py -> Must specify input file.\n")
sys.exit(1)

########################################################################

0 comments on commit ecb04d7

Please sign in to comment.