From 19c0b7e179ae04a00e5a64055cae3b5a8c0e8769 Mon Sep 17 00:00:00 2001 From: j-opatz Date: Thu, 25 May 2023 16:43:31 -0600 Subject: [PATCH 1/6] added first use case, need to finish docs and add to list --- ...stMPAS_obsSATCORPS_lowAndTotalCloudFrac.py | 114 +++ ...MPAS_obsSATCORPS_lowAndTotalCloudFrac.conf | 206 ++++ .../read_input_data.py | 911 ++++++++++++++++++ 3 files changed, 1231 insertions(+) create mode 100644 docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.py create mode 100644 parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.conf create mode 100755 parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac/read_input_data.py diff --git a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.py b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.py new file mode 100644 index 0000000000..1e26265475 --- /dev/null +++ b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.py @@ -0,0 +1,114 @@ +""" +GridStat: Cloud Fractions with Various Settings +=============================================== + +model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.conf + +""" +############################################################################## +# Scientific Objective +# -------------------- +# +# This use case captures various statistical measures of two model comparisons +# for low and total cloud fraction with different neighborhood and probability +# settings for internal model metrics and to aid in future model updates +# + +############################################################################## +# Datasets +# -------- +# +# | **Forecast:** Model for Prediction Across Scales (MPAS) +# | **Observations:** Satellite ClOud and Radiation Property retrieval System (SatCORPS) +# | **Grid:** GPP 17km masking region +# +# | **Location:** All of the input data required for this use case can be found in the met_test sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases +# | This tarball should be unpacked into the directory that you will set the value of INPUT_BASE. See 'Running METplus' section for more information. +# + +############################################################################## +# METplus Components +# ------------------ +# +# This use case utilizes the METplus PlotPointObs wrapper to generate a +# command to run the MET tool plot_point_obs if all required files are found. + +############################################################################## +# METplus Workflow +# ---------------- +# +# PlotPointObs is the only tool called in this example. +# It processes the following run time: +# +# | **Valid:** 2012-04-09 12Z +# | **Forecast lead:** 12 hour +# | + +############################################################################## +# METplus Configuration +# --------------------- +# +# METplus first loads the default configuration file found in parm/metplus_config, +# then it loads any configuration files passed to METplus via the command line: +# parm/use_cases/met_tool_wrapper/PlotPointObs/PlotPointObs.conf +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/met_tool_wrapper/PlotPointObs/PlotPointObs.conf + +############################################################################## +# MET Configuration +# ----------------- +# +# METplus sets environment variables based on user settings in the METplus configuration file. +# See :ref:`How METplus controls MET config file settings` for more details. +# +# **YOU SHOULD NOT SET ANY OF THESE ENVIRONMENT VARIABLES YOURSELF! THEY WILL BE OVERWRITTEN BY METPLUS WHEN IT CALLS THE MET TOOLS!** +# +# If there is a setting in the MET configuration file that is currently not supported by METplus you'd like to control, please refer to: +# :ref:`Overriding Unsupported MET config file settings` +# +# .. note:: See the :ref:`PlotPointObs MET Configuration` section of the User's Guide for more information on the environment variables used in the file below: +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/met_config/PlotPointObsConfig_wrapped + + +############################################################################## +# Running METplus +# --------------- +# +# Pass the use case configuration file to the run_metplus.py script +# along with any user-specific system configuration files if desired:: +# +# run_metplus.py /path/to/METplus/parm/use_cases/met_tool_wrapper/PlotPointObs/PlotPointObs.conf /path/to/user_system.conf +# +# See :ref:`running-metplus` for more information. + +############################################################################## +# Expected Output +# --------------- +# +# A successful run will output the following both to the screen and to the logfile:: +# +# INFO: METplus has successfully finished running. +# +# Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. +# Output for this use case will be found in plot_point_obs +# (relative to **OUTPUT_BASE**) +# and will contain the following file: +# +# * nam_and_ndas.20120409.t12z.prepbufr_CONFIG.ps + +############################################################################## +# Keywords +# -------- +# +# .. note:: +# +# * PlotPointObsToolUseCase +# * GRIBFileUseCase +# * NetCDFFileUseCase +# +# Navigate to the :ref:`quick-search` page to discover other similar use cases. +# +# sphinx_gallery_thumbnail_path = '_static/met_tool_wrapper-PlotPointObs.png' diff --git a/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.conf b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.conf new file mode 100644 index 0000000000..1020ad1661 --- /dev/null +++ b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.conf @@ -0,0 +1,206 @@ +[config] + +# Documentation for this use case can be found at +# https://metplus.readthedocs.io/en/latest/generated/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.html + +# For additional information, please see the METplus Users Guide. +# https://metplus.readthedocs.io/en/latest/Users_Guide + +# ### +# Processes to run +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#process-list +### + + +PROCESS_LIST = GridStat, GridStat(nbr), GridStat(prob) + +### +# Time Info +# LOOP_BY options are INIT, VALID, RETRO, and REALTIME +# If set to INIT or RETRO: +# INIT_TIME_FMT, INIT_BEG, INIT_END, and INIT_INCREMENT must also be set +# If set to VALID or REALTIME: +# VALID_TIME_FMT, VALID_BEG, VALID_END, and VALID_INCREMENT must also be set +# LEAD_SEQ is the list of forecast leads to process +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#timing-control +### + +LOOP_BY = INIT +INIT_TIME_FMT = %Y%m%d%H +INIT_BEG=2020072300 +INIT_END=2020072300 +INIT_INCREMENT = 12H + +LEAD_SEQ = 36 + +LOOP_ORDER = times + +### +# File I/O +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#directory-and-filename-template-info +### + +FCST_GRID_STAT_INPUT_DIR = +FCST_GRID_STAT_INPUT_TEMPLATE = PYTHON_NUMPY + +OBS_GRID_STAT_INPUT_DIR = +OBS_GRID_STAT_INPUT_TEMPLATE = PYTHON_NUMPY + +GRID_STAT_CLIMO_MEAN_INPUT_DIR = +GRID_STAT_CLIMO_MEAN_INPUT_TEMPLATE = + +GRID_STAT_CLIMO_STDEV_INPUT_DIR = +GRID_STAT_CLIMO_STDEV_INPUT_TEMPLATE = + +GRID_STAT_OUTPUT_DIR = {OUTPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac +GRID_STAT_OUTPUT_TEMPLATE = + + +### +# Field Info +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#field-info +### + +MODEL = MPAS +OBTYPE = SATCORPS + +CONFIG_DIR = {PARM_BASE}/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac + +FCST_VAR1_NAME = {CONFIG_DIR}/read_input_data.py {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac/diag.{valid?fmt=%Y-%m-%d_%H}.00.00_latlon.nc:{MODEL}:totalCloudFrac:{init?fmt=%Y%m%d%H}:{valid?fmt=%Y%m%d%H}:1 +FCST_VAR1_LEVELS = +FCST_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 + +FCST_VAR2_NAME = {CONFIG_DIR}/read_input_data.py {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac/diag.{valid?fmt=%Y-%m-%d_%H}.00.00_latlon.nc:{MODEL}:lowCloudFrac:{init?fmt=%Y%m%d%H}:{valid?fmt=%Y%m%d%H}:1 +FCST_VAR2_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 + +OBS_VAR1_NAME = {CONFIG_DIR}/read_input_data.py {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac/GEO-MRGD.{valid?fmt=%Y%j.%H}00.GRID.NC:{OBTYPE}:totalCloudFrac:{init?fmt=%Y%m%d%H}:{valid?fmt=%Y%m%d%H}:2 +OBS_VAR1_LEVELS = +OBS_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 + +OBS_VAR2_NAME = {CONFIG_DIR}/read_input_data.py {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac/GEO-MRGD.{valid?fmt=%Y%j.%H}00.GRID.NC:{OBTYPE}:lowCloudFrac:{init?fmt=%Y%m%d%H}:{valid?fmt=%Y%m%d%H}:2 +OBS_VAR2_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 + + +### +# GridStat Settings +# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#gridstat +### + +#LOG_GRID_STAT_VERBOSITY = 2 + +GRID_STAT_CONFIG_FILE = {PARM_BASE}/met_config/GridStatConfig_wrapped + + +GRID_STAT_REGRID_TO_GRID = FCST +GRID_STAT_REGRID_METHOD = BILIN +GRID_STAT_REGRID_WIDTH = 2 + + +GRID_STAT_DESC = + +FCST_GRID_STAT_FILE_WINDOW_BEGIN = 0 +FCST_GRID_STAT_FILE_WINDOW_END = 0 +OBS_GRID_STAT_FILE_WINDOW_BEGIN = 0 +OBS_GRID_STAT_FILE_WINDOW_END = 0 + +GRID_STAT_NEIGHBORHOOD_WIDTH = 1 +GRID_STAT_NEIGHBORHOOD_SHAPE = SQUARE + +GRID_STAT_NEIGHBORHOOD_COV_THRESH = >=0.5 + +GRID_STAT_ONCE_PER_FIELD = False + +FCST_IS_PROB = false + +FCST_GRID_STAT_PROB_THRESH = ==0.1 + +OBS_IS_PROB = false + +OBS_GRID_STAT_PROB_THRESH = ==0.1 + +GRID_STAT_OUTPUT_PREFIX = {MODEL}_F{lead?fmt=%H}_CloudFracs + + +GRID_STAT_OUTPUT_FLAG_FHO = STAT +GRID_STAT_OUTPUT_FLAG_CTC = STAT +GRID_STAT_OUTPUT_FLAG_CTS = STAT +GRID_STAT_OUTPUT_FLAG_CNT = STAT +GRID_STAT_OUTPUT_FLAG_SL1L2 = STAT +GRID_STAT_OUTPUT_FLAG_GRAD = STAT + +GRID_STAT_NC_PAIRS_FLAG_LATLON = TRUE +GRID_STAT_NC_PAIRS_FLAG_RAW = TRUE +GRID_STAT_NC_PAIRS_FLAG_DIFF = TRUE +GRID_STAT_NC_PAIRS_FLAG_CLIMO = FALSE +GRID_STAT_NC_PAIRS_FLAG_GRADIENT = TRUE +GRID_STAT_NC_PAIRS_FLAG_APPLY_MASK = TRUE + + +GRID_STAT_MASK_POLY = {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac/GPP_17km_60S_60N_mask.nc + +[nbr] + +FCST_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0, >SFP20, >SFP30, >SFP40, >SFP50, >SFP60, >SFP70, >SFP80 + +OBS_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0, >SOP20, >SOP30, >SOP40, >SOP50, >SOP60, >SOP70, >SOP80 + +GRID_STAT_NEIGHBORHOOD_WIDTH = 3, 5, 7, 9 +GRID_STAT_NEIGHBORHOOD_SHAPE = CIRCLE + +GRID_STAT_NEIGHBORHOOD_COV_THRESH = >0.0 + +GRID_STAT_OUTPUT_FLAG_FHO = NONE +GRID_STAT_OUTPUT_FLAG_CTC = NONE +GRID_STAT_OUTPUT_FLAG_CTS = NONE +GRID_STAT_OUTPUT_FLAG_CNT = NONE +GRID_STAT_OUTPUT_FLAG_SL1L2 = NONE +GRID_STAT_OUTPUT_FLAG_NBRCTC = STAT +GRID_STAT_OUTPUT_FLAG_NBRCTS = STAT +GRID_STAT_OUTPUT_FLAG_NBRCNT = STAT +GRID_STAT_OUTPUT_FLAG_GRAD = NONE + +GRID_STAT_NC_PAIRS_FLAG_LATLON = TRUE +GRID_STAT_NC_PAIRS_FLAG_RAW = TRUE +GRID_STAT_NC_PAIRS_FLAG_DIFF = TRUE +GRID_STAT_NC_PAIRS_FLAG_NBRHD = TRUE +GRID_STAT_NC_PAIRS_FLAG_GRADIENT = TRUE +GRID_STAT_NC_PAIRS_FLAG_APPLY_MASK = TRUE + +GRID_STAT_OUTPUT_PREFIX = {MODEL}_F{lead?fmt=%H}_CloudFracs_NBR + +[prob] + +OBS_VAR1_THRESH = gt0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 +OBS_VAR2_THRESH = gt0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 +FCST_IS_PROB = TRUE + +FCST_VAR1_THRESH = >0.1, >0.2, >0.3, >0.4, >0.5, >0.6, >0.7, >0.8, >0.9, >1.0 +FCST_VAR2_THRESH = >0.1, >0.2, >0.3, >0.4, >0.5, >0.6, >0.7, >0.8, >0.9, >1.0 +GRID_STAT_NEIGHBORHOOD_WIDTH = 3, 5, 7, 9 +GRID_STAT_NEIGHBORHOOD_SHAPE = CIRCLE + +GRID_STAT_NEIGHBORHOOD_COV_THRESH = >0.0 + +GRID_STAT_OUTPUT_FLAG_FHO = NONE +GRID_STAT_OUTPUT_FLAG_CTC = NONE +GRID_STAT_OUTPUT_FLAG_CTS = NONE +GRID_STAT_OUTPUT_FLAG_CNT = NONE +GRID_STAT_OUTPUT_FLAG_SL1L2 = NONE +GRID_STAT_OUTPUT_FLAG_NBRCTC = NONE +GRID_STAT_OUTPUT_FLAG_NBRCTS = NONE +GRID_STAT_OUTPUT_FLAG_NBRCNT = NONE +GRID_STAT_OUTPUT_FLAG_GRAD = NONE +GRID_STAT_OUTPUT_FLAG_PCT = STAT +GRID_STAT_OUTPUT_FLAG_PSTD = STAT +GRID_STAT_OUTPUT_FLAG_PJC = STAT +GRID_STAT_OUTPUT_FLAG_PRC = STAT + +GRID_STAT_NC_PAIRS_FLAG_LATLON = TRUE +GRID_STAT_NC_PAIRS_FLAG_RAW = TRUE +GRID_STAT_NC_PAIRS_FLAG_DIFF = TRUE +GRID_STAT_NC_PAIRS_FLAG_NBRHD = FALSE +GRID_STAT_NC_PAIRS_FLAG_GRADIENT = TRUE +GRID_STAT_NC_PAIRS_FLAG_APPLY_MASK = TRUE + +GRID_STAT_OUTPUT_PREFIX = {MODEL}_F{lead?fmt=%H}_CloudFracs_PROB + diff --git a/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac/read_input_data.py b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac/read_input_data.py new file mode 100755 index 0000000000..aa436135a6 --- /dev/null +++ b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac/read_input_data.py @@ -0,0 +1,911 @@ +#this code was provided by Craig Schwartz +#and is largely unaltered from its original +#function. + +#from __future__ import print_function +import os +import sys +import numpy as np +import datetime as dt +from netCDF4 import Dataset # http://code.google.com/p/netcdf4-python/ +from scipy.interpolate import NearestNDInterpolator, LinearNDInterpolator +#### for Plotting +import matplotlib.cm as cm +import matplotlib.axes as maxes +import matplotlib.pyplot as plt +from mpl_toolkits.axes_grid1 import make_axes_locatable +#from mpl_toolkits.basemap import Basemap +import fnmatch +import pygrib +import pickle as pk +##### + +########################################### + +missing_values = -9999.0 # for MET + +# UPP top layer bounds (Pa) for cloud layers +PTOP_LOW_UPP = 64200. # low for > 64200 Pa +PTOP_MID_UPP = 35000. # mid between 35000-64200 Pa +PTOP_HIGH_UPP = 15000. # high between 15000-35000 Pa + +# Values for 4 x 4 contingency table +Na, Nb, Nc, Nd = 1, 2, 3, 4 +Ne, Nf, Ng, Nh = 5, 6, 7, 8 +Ni, Nj, Nk, Nl = 9, 10, 11, 12 +Nm, Nn, No, Np = 13, 14, 15, 16 + +# Notes: +# 1) Entry for 'point' is for point-to-point comparison and is all dummy data (except for gridType) that is overwritten by point2point +# 2) ERA5 on NCAR CISL RDA changed at some point. Old is ERA5_2017 (not used anymore), new is ERA5, which we'll use for 2020 data +griddedDatasets = { + 'MERRA2' : { 'gridType':'LatLon', 'latVar':'lat', 'latDef':[-90.0,0.50,361], 'lonVar':'lon', 'lonDef':[-180.0,0.625,576], 'flipY':True, 'ftype':'nc'}, + 'SATCORPS' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[-180.0,0.3125,1152], 'flipY':False, 'ftype':'nc' }, + 'ERA5_2017': { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-89.7848769072,0.281016829130516,640], 'lonVar':'longitude', 'lonDef':[0.0,0.28125,1280], 'flipY':False, 'ftype':'nc' }, + 'ERA5' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'nc' }, + 'GFS' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'grib'}, + 'GALWEM' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':True, 'ftype':'grib'}, + 'GALWEM17' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-89.921875,0.156250,1152], 'lonVar':'longitude', 'lonDef':[0.117187,0.234375,1536], 'flipY':False, 'ftype':'grib'}, + 'WWMCA' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'grib'}, + 'MPAS' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'nc'}, + 'SAT_WWMCA_MEAN' : { 'gridType':'LatLon', 'latVar':'lat','latDef':[-90.0,0.25,721], 'lonVar':'lon', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'nc' }, + 'point' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.156250,1152], 'lonVar':'longitude', 'lonDef':[0.117187,0.234375,1536], 'flipY':False, 'ftype':'nc'}, +} + #TODO:Correct one, but MET can ingest a Gaussian grid only in Grib2 format (from Randy B.) + #'ERA5' : { 'gridType':'Gaussian', 'nx':1280, 'ny':640, 'lon_zero':0, 'latVar':'latitude', 'lonVar':'longitude', 'flipY':False, }, + +#GALWEM, both 17-km and 0.25-degree +lowCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':3, 'typeOfFirstFixedSurface':10, 'shortName':'lcc' } +midCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':4, 'typeOfFirstFixedSurface':10, 'shortName':'mcc' } +highCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':5, 'typeOfFirstFixedSurface':10, 'shortName':'hcc' } +totalCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':10, 'shortName':'tcc' } +cloudTopHeight_GALWEM = { 'parameterCategory':6, 'parameterNumber':12, 'typeOfFirstFixedSurface':3, 'shortName':'cdct' } +cloudBaseHeight_GALWEM = { 'parameterCategory':6, 'parameterNumber':11, 'typeOfFirstFixedSurface':2, 'shortName':'cdcb' } + +#GFS +lowCloudFrac_GFS = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':214, 'shortName':'tcc' } +midCloudFrac_GFS = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':224, 'shortName':'tcc' } +highCloudFrac_GFS = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':234, 'shortName':'tcc' } + +#WWMCA +totalCloudFrac_WWMCA = { 'parameterName':71, 'typeOfLevel':'entireAtmosphere', 'level':0 } + +cloudTopHeightLev1_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':1 } +cloudTopHeightLev2_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':2 } +cloudTopHeightLev3_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':3 } +cloudTopHeightLev4_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':4 } +cloudTopHeight_WWMCA = [ cloudTopHeightLev1_WWMCA, cloudTopHeightLev2_WWMCA, cloudTopHeightLev3_WWMCA, cloudTopHeightLev4_WWMCA ] + +cloudBaseHeightLev1_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':1 } +cloudBaseHeightLev2_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':2 } +cloudBaseHeightLev3_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':3 } +cloudBaseHeightLev4_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':4 } +cloudBaseHeight_WWMCA = [ cloudBaseHeightLev1_WWMCA, cloudBaseHeightLev2_WWMCA, cloudBaseHeightLev3_WWMCA, cloudBaseHeightLev4_WWMCA ] + +verifVariablesModel = { + 'binaryCloud' : {'GFS':[''], 'GALWEM17':[totalCloudFrac_GALWEM], 'GALWEM':[totalCloudFrac_GALWEM], 'MPAS':['cldfrac_tot_UM_rand']}, + 'totalCloudFrac' : {'GFS':[''], 'GALWEM17':[totalCloudFrac_GALWEM], 'GALWEM':[totalCloudFrac_GALWEM], 'MPAS':['cldfrac_tot_UM_rand']}, + 'lowCloudFrac' : {'GFS':[lowCloudFrac_GFS], 'GALWEM17':[lowCloudFrac_GALWEM], 'GALWEM':[lowCloudFrac_GALWEM], 'MPAS':['cldfrac_low_UM']}, + 'midCloudFrac' : {'GFS':[midCloudFrac_GFS], 'GALWEM17':[midCloudFrac_GALWEM], 'GALWEM':[midCloudFrac_GALWEM], 'MPAS':['cldfrac_mid_UM']}, + 'highCloudFrac' : {'GFS':[highCloudFrac_GFS], 'GALWEM17':[highCloudFrac_GALWEM], 'GALWEM':[highCloudFrac_GALWEM], 'MPAS':['cldfrac_high_UM']}, + 'cloudTopHeight' : {'GFS':[''] , 'GALWEM17':[cloudTopHeight_GALWEM], 'GALWEM':[cloudTopHeight_GALWEM], 'MPAS':['cldht_top_UM']}, + 'cloudBaseHeight' : {'GFS':[''] , 'GALWEM17':[cloudBaseHeight_GALWEM], 'GALWEM':[cloudBaseHeight_GALWEM], 'MPAS':['cldht_base_UM']}, +} + +cloudFracCatThresholds = '>0, <10.0, >=10.0, >=20.0, >=30.0, >=40.0, >=50.0, >=60.0, >=70.0, >=80.0, >=90.0' # MET format string +brightnessTempThresholds = '<280.0, <275.0, <273.15, <270.0, <265.0, <260.0, <255.0, <250.0, <245.0, <240.0, <235.0, <230.0, <225.0, <220.0, <215.0, <210.0, <=SFP1, <=SFP5, <=SFP10, <=SFP25, <=SFP50, >=SFP50, >=SFP75, >=SFP90, >=SFP95, >=SFP99' +verifVariables = { + 'binaryCloud' : { 'MERRA2':['CLDTOT'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['TCC'], 'WWMCA':[totalCloudFrac_WWMCA], 'SAT_WWMCA_MEAN':['Mean_WWMCA_SATCORPS'], 'units':'NA', 'thresholds':'>0.0', 'interpMethod':'nearest' }, + 'totalCloudFrac' : { 'MERRA2':['CLDTOT'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['tcc'], 'WWMCA':[totalCloudFrac_WWMCA], 'SAT_WWMCA_MEAN':['Mean_WWMCA_SATCORPS'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'lowCloudFrac' : { 'MERRA2':['CLDLOW'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['lcc'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'midCloudFrac' : { 'MERRA2':['CLDMID'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['MCC'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'highCloudFrac' : { 'MERRA2':['CLDHGH'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['HCC'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'cloudTopTemp' : { 'MERRA2':['CLDTMP'], 'SATCORPS':['cloud_temperature_top_level'], 'ERA5':[''] , 'units':'K', 'thresholds':'NA', 'interpMethod':'bilin'}, + 'cloudTopPres' : { 'MERRA2':['CLDPRS'], 'SATCORPS':['cloud_pressure_top_level'], 'ERA5':[''] , 'units':'hPa', 'thresholds':'NA', 'interpMethod':'bilin'}, + 'cloudTopHeight' : { 'MERRA2':[''] , 'SATCORPS':['cloud_height_top_level'], 'ERA5':[''] , 'WWMCA':cloudTopHeight_WWMCA, 'units':'m', 'thresholds':'NA', 'interpMethod':'nearest'}, + 'cloudBaseHeight': { 'MERRA2':[''] , 'SATCORPS':['cloud_height_base_level'], 'ERA5':['cbh'], 'WWMCA':cloudBaseHeight_WWMCA, 'units':'m', 'thresholds':'NA', 'interpMethod':'nearest'}, + 'cloudCeiling' : { 'MERRA2':[''] , 'SATCORPS':[''], 'ERA5':[''] , 'units':'m', 'thresholds':'NA', 'interpMethod':'bilin'}, + 'brightnessTemp' : { 'MERRA2':[''] , 'SATCORPS':[''], 'ERA5':[''] , 'units':'K', 'thresholds':brightnessTempThresholds, 'interpMethod':'bilin'}, +} + +# Combine the two dictionaries +# Only reason verifVariablesModel exists is just for space--verifVaribles gets too long if we keep adding more datasets +for key in verifVariablesModel.keys(): + x = verifVariablesModel[key] + for key1 in x.keys(): + verifVariables[key][key1] = x[key1] + +#f = '/glade/u/home/schwartz/cloud_verification/GFS_grib_0.25deg/2018112412/gfs.0p25.2018112412.f006.grib2' +#grbs = pygrib.open(f) +#idx = pygrib.index(f,'parameterCategory','parameterNumber','typeOfFirstFixedSurface') +#model = 'GFS' +#variable = 'totCloudCover' +#x = verifVariablesModel[variable][model] # returns a list, whose ith element is a dictionary +# e.g., idx(parameterCategory=6,parameterNumber=1,typeOfFirstFixedSurface=234) +#idx(parameterCategory=x[0]['parameterCategory'],parameterNumber=x[0]['parameterNumber'],typeOfFirstFixedSurface=x[0]['typeOfFirstFixedSurface']) + +# to read in an environmental variable +#x = os.getenv('a') # probably type string no matter what + +########### + +def getThreshold(variable): + x = verifVariables[variable]['thresholds'] + print(x) # needed for python 3 to read variable into csh variable + return x + +def getInterpMethod(variable): + x = verifVariables[variable]['interpMethod'].upper() + print(x) # needed for python 3 to read variable into csh variable + return x + +def getTotalCloudFrac(source,data): + if source == 'SATCORPS': + # x = data[0][0,:,:,0] * 1.0E-2 # scaling + x = (data[0][0,:,:,1] + data[0][0,:,:,2] + data[0][0,:,:,3])*1.0E-2 # scaling + # y = data[0] + # x = np.sum( y[:,:,:,1:4],axis=3) + elif source == 'MERRA2': +# x = ( data[0][0,:,:]+data[1][0,:,:]+data[2][0,:,:] ) *100.0 # the ith element of data is a numpy array + x = data[0][0,:,:] * 100.0 # the ith element of data is a numpy array + print(x.min(), x.max()) + elif source == 'ERA5': + try: x = data[0][0,0,:,:] * 100.0 + except: x = data[0][0,:,:] * 100.0 + elif source == 'MPAS': + x = data[0][0,:,:] * 100.0 + elif source == 'SAT_WWMCA_MEAN': + x = data[0][0,:,:] # already in % + else: + x = data[0] + + # This next line is WRONG. + # Missing should be set to missing + # Then, the non-missing values are 1s and 0s + #output = np.where(x > 0.0, x, 0.0) + #output = np.where(x < 0.0, -9999.0, x) # missing. currently used for SATCORPS + + x = np.where( x < 0.0 , 0.0, x) # Force negative values to zero + x = np.where( x > 100.0, 100.0, x) # Force values > 100% to 100% + return x + +def getBinaryCloud(source,data): + y = getTotalCloudFrac(source,data) + # keep NaNs as is, but then set everything else to either 100% or 0% + x = np.where( np.isnan(y), y, np.where(y > 0.0, 100.0, 0.0) ) + return x + +def getLayerCloudFrac(source,data,layer): + if source == 'SATCORPS': + if layer.lower().strip() == 'low' : i = 1 + if layer.lower().strip() == 'mid' : i = 2 + if layer.lower().strip() == 'high' : i = 3 + x = data[0][0,:,:,i] * 1.0E-2 # scaling + elif source == 'MERRA2': + x = data[0][0,:,:] * 100.0 + elif source == 'ERA5': + try: x = data[0][0,0,:,:] * 100.0 + except: x = data[0][0,:,:] * 100.0 + elif source == 'MPAS': + x = data[0][0,:,:] * 100.0 + else: + x = data[0] + + x = np.where( x < 0.0, 0.0, x) # Force negative values to zero + x = np.where( x > 100.0, 100.0, x) # Force values > 100% to 100% + + return x + +def getCloudTopTemp(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E-2 # scaling + elif source == 'MERRA2': + x = data[0][0,:,:] + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + else: + x = data[0] + return x + +def getCloudTopPres(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E-1 # scaling + elif source == 'MERRA2': + x = data[0][0,:,:] * 1.0E-2 # scaling [Pa] -> [hPa] + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + else: + x = data[0] + return x + +def getCloudTopHeight(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E+1 # scaling to [meters] + elif source == 'MERRA2': + x = data[0][0,:,:] #TBD + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + elif source == 'GALWEM17': + x = data[0] * 1000.0 * 0.3048 # kilofeet -> meters + elif source == 'MPAS': + x = data[0][0,:,:] # already in meters + elif source == 'WWMCA': + # data is a list (should be length 4) + if len(data) != 4: + print('error with WWMCA Cloud top height') + sys.exit() + tmp = np.array(data) # already in meters + tmp = np.where( tmp <= 0, np.nan, tmp) # replace 0 or negative values with NAN + x = np.nanmax(tmp,axis=0) # get maximum cloud top height across all layers + else: + x = data[0] + + # Eliminate unphysical values (assume cloud top shouldn't be > 50000 meters) + y = np.where( x > 50000.0 , np.nan, x ) + + return y + +def getCloudBaseHeight(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E+1 # scaling to [meters] + elif source == 'MERRA2': + x = data[0][0,:,:] #TBD + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + elif source == 'GALWEM17': + x = data[0] * 1000.0 * 0.3048 # kilofeet -> meters + elif source == 'MPAS': + x = data[0][0,:,:] # already in meters + elif source == 'WWMCA': + # data is a list (should be length 4) + if len(data) != 4: + print('error with WWMCA Cloud base height') + sys.exit() + tmp = np.array(data) # already in meters + tmp = np.where( tmp <= 0, np.nan, tmp) # replace 0 or negative values with NAN + x = np.nanmin(tmp,axis=0) # get lowest cloud base over all layers + else: + x = data[0] + + # Eliminate unphysical values (assume cloud base shouldn't be > 50000 meters) + y = np.where( x > 50000.0 , np.nan, x ) + + return y + +def getCloudCeiling(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] #TBD + elif source == 'MERRA2': + x = data[0][0,:,:] #TBD + elif source == 'ERA5': + try: x = data[0][0,0,:,:] # TBD + except: x = data[0][0,:,:] + return x + +# add other functions for different variables + +########### + +def getDataArray(inputFile,source,variable,dataSource): + # 1) inputFile: File name--either observations or forecast + # 2) source: Obsevation source (e.g., MERRA, SATCORP, etc.) + # 3) variable: Variable to verify + # 4) dataSource: If 1, process forecast file. If 2 process obs file. + +# # specifying names here temporarily. file names should be passed in to python from shell script +# if source == 'merra': nc_file = '/gpfs/fs1/scratch/schwartz/MERRA/MERRA2_400.tavg1_2d_rad_Nx.20181101.nc4' +# elif source == 'satcorp': nc_file = '/glade/scratch/bjung/met/test_satcorps/GEO-MRGD.2018334.0000.GRID.NC' +# elif source == 'era5': nc_file = '/glade/scratch/bjung/met/test_era5/e5.oper.fc.sfc.instan.128_164_tcc.regn320sc.2018111606_2018120112.nc' + + source = source.upper().strip() # Force uppercase and get rid of blank spaces, for safety + + print('dataSource = ',dataSource) + + ftype = griddedDatasets[source]['ftype'].lower().strip() + + # Get file handle + if ftype == 'nc': + nc_fid = Dataset(inputFile, "r", format="NETCDF4") + #nc_fid.set_auto_scale(True) + elif ftype == 'grib': + if source == 'WWMCA': + idx = pygrib.index(inputFile,'parameterName','typeOfLevel','level') + else: + idx = pygrib.index(inputFile,'parameterCategory','parameterNumber','typeOfFirstFixedSurface') + + # dataSource == 1 means forecast, 2 means obs +# if dataSource == 1: varsToRead = verifVariablesModel[variable][source] # if ftype == 'grib', returns a list whose ith element is a dictionary. otherwise, just a list +# if dataSource == 2: varsToRead = verifVariables[variable][source] # returns a list + varsToRead = verifVariables[variable][source] # if ftype == 'grib', returns a list whose ith element is a dictionary. otherwise, just a list + + print('Trying to read ',inputFile) + + # Get lat/lon information--currently not used + #latVar = griddedDatasets[source]['latVar'] + #lonVar = griddedDatasets[source]['lonVar'] + #lats = np.array(nc_fid.variables[latVar][:]) # extract/copy the data + #lons = np.array(nc_fid.variables[lonVar][:] ) + + #print(lats.max()) + #print(lons.max()) + + # one way to deal with scale factors + # probably using something like nc_fid.set_auto_scale(True) is better... + #latMax = lats.max() + #while latMax > 90.0: + # lons = lons * 0.1 + # lats = lats * 0.1 + # latMax = lats.max() + + # get data + data = [] + for v in varsToRead: + if ftype == 'grib': + if source == 'WWMCA': + x = idx(parameterName=v['parameterName'],typeOfLevel=v['typeOfLevel'],level=v['level'])[0] # by getting element 0, you get a pygrib message + else: + # e.g., idx(parameterCategory=6,parameterNumber=1,typeOfFirstFixedSurface=234) + if ( variable == 'cloudTopHeight' or variable == 'cloudBaseHeight') and source == 'GALWEM17': + x = idx(parameterCategory=v['parameterCategory'],parameterNumber=v['parameterNumber'],typeOfFirstFixedSurface=v['typeOfFirstFixedSurface'])[1] # by getting element 1, you get a pygrib message + else: + x = idx(parameterCategory=v['parameterCategory'],parameterNumber=v['parameterNumber'],typeOfFirstFixedSurface=v['typeOfFirstFixedSurface'])[0] # by getting element 0, you get a pygrib message + if x.shortName != v['shortName']: print('Name mismatch!') + #ADDED BY JOHN O + print(x) + print('Reading ', x.shortName, 'at level ', x.typeOfFirstFixedSurface) + read_var = x.values # same x.data()[0] + read_missing = x.missingValue + print('missing value = ',read_missing) + + # The missing value (read_missing) for GALWEM17 and GALWEM cloud base/height is 9999, which is not the best choice because + # those could be actual values. So we need to use the masked array part (below) to handle which + # values are missing. We also set read_missing to something unphysical to essentially disable it. + # Finally, if we don't change the 'missingValue' property in the GRIB2 file we are eventually outputting, + # the bitmap will get all messed up, because it will be based on 9999 instead of $missing_values + if variable == 'cloudTopHeight' or variable == 'cloudBaseHeight': + read_missing = -9999. + x['missingValue'] = read_missing + if source == 'GALWEM17': + #These are masked numpy arrays, with mask = True where there is a missing value (no cloud) + #Use np.ma.filled to create an ndarray where mask = True values are set to np.nan + read_var = np.ma.filled(read_var.astype(read_var.dtype), np.nan) + elif ftype == 'nc': + read_var = nc_fid.variables[v] # extract/copy the data + try: + read_missing = read_var.missing_value # get variable attributes. Each dataset has own missing values. + except: + read_missing = -9999. # set a default missing value. probably only need to do this for MPAS + + print('Reading ', v) + + this_var = np.array( read_var ) # to numpy array + #print(read_missing, np.nan) + this_var = np.where( this_var==read_missing, np.nan, this_var ) + #print(this_var.shape) + data.append(this_var) # ith element of the list is a NUMPY ARRAY for the ith variable + #print(type(this_var)) + #print(type(data)) + + # Call a function to get the variable of interest. + # Add a new function for each variable + if variable == 'binaryCloud': raw_data = getBinaryCloud(source,data) + if variable == 'totalCloudFrac': raw_data = getTotalCloudFrac(source,data) + if variable == 'lowCloudFrac': raw_data = getLayerCloudFrac(source,data,'low') + if variable == 'midCloudFrac': raw_data = getLayerCloudFrac(source,data,'mid') + if variable == 'highCloudFrac': raw_data = getLayerCloudFrac(source,data,'high') + if variable == 'cloudTopTemp': raw_data = getCloudTopTemp(source,data) + if variable == 'cloudTopPres': raw_data = getCloudTopPres(source,data) + if variable == 'cloudTopHeight': raw_data = getCloudTopHeight(source,data) + if variable == 'cloudBaseHeight': raw_data = getCloudBaseHeight(source,data) + if variable == 'cloudCeiling': raw_data = getCloudCeiling(source,data) + + raw_data = np.where(np.isnan(raw_data), missing_values, raw_data) # replace np.nan to missing_values (for MET) + + # Array met_data is passed to MET + # Graphics should plot $met_data to make sure things look correct + if griddedDatasets[source]['flipY']: + print('flipping ',source,' data about y-axis') + met_data=np.flip(raw_data,axis=0).astype(float) + else: + met_data=raw_data.astype(float) + + # Make plotting optional or Just use plot_data_plane +# plt_data=np.where(met_data<0, np.nan, met_data) +# map=Basemap(projection='cyl',llcrnrlat=-90,urcrnrlat=90,llcrnrlon=-180,urcrnrlon=180,resolution='c') +# map.drawcoastlines() +# map.drawcountries() +# map.drawparallels(np.arange(-90,90,30),labels=[1,1,0,1]) +# map.drawmeridians(np.arange(0,360,60),labels=[1,1,0,1]) +# plt.contourf(lons,lats,plt_data,20,origin='upper',cmap=cm.Greens) #cm.gist_rainbow) +# title=source+"_"+variable+"_"+str(validTime) +# plt.title(title) +# plt.colorbar(orientation='horizontal') +# plt.savefig(title+".png") + + # If a forecast file, output a GRIB file with + # 1 record containing the met_data + # This is a hack, because right now, MET python embedding doesn't work with pygrib, + # so output the data to a temporary file, and then have MET read the temporary grib file. + # Starting with version 9.0 of MET, the hack isn't needed, and MET python embedding works with pygrib + outputFcstFile = False # MUST be True for MET version < 9.0. For MET 9.0+, optional + if dataSource == 1 and ftype == 'grib': + if outputFcstFile: + grbtmp = x + grbtmp['values']=met_data + grbout = open('temp_fcst.grb2','ab') + grbout.write(grbtmp.tostring()) + grbout.close() # Close the outfile GRIB file + print('Successfully output temp_fcst.grb2') + + # Close files + if ftype == 'grib': idx.close() # Close the input GRIB file + if ftype == 'nc': nc_fid.close() # Close the netCDF file + + return met_data + +def obsError(fcstData,obsErrorFile,validDate,dataSource): + + print('Adding noise to the cloud fraction fields') + print('Using obsErrorFile',obsErrorFile) + + # First load the obsError information + #obsErrorFile = 'ob_errors.pk' + infile = open(obsErrorFile,'rb') + binEdges, binStddev = pk.load(infile) # 'numpy.ndarray' types + infile.close() + + # Get 1d forecast data + shape = fcstData.shape + fcst = fcstData.flatten() + + # Set random number seed based on valid time and model + if dataSource.upper().strip() == 'MPAS': ii = 10 + elif dataSource.upper().strip() == 'GALWEM': ii = 20 + elif dataSource.upper().strip() == 'GFS': ii = 30 + np.random.seed(int(validDate*.1 + ii)) + + # Find which bin the data is in + for i in range(0,len(binEdges)-1): + idx = np.where( (fcst >= binEdges[i]) & (fcst < binEdges[i+1]) )[0] + n = len(idx) # number of points in the ith bin + if n > 0: # check for empty bins + randVals = np.random.normal(0,binStddev[i],n) + fcst[idx] = fcst[idx] + randVals + + # bound forecast values to between 0 and 100% + fcst = np.where( fcst < 0.0, 0.0, fcst) + fcst = np.where( fcst > 100.0, 100.0, fcst) + + # now reshape forecast data back to 2D + output = fcst.reshape(shape) + + # data will have NaNs where bad. + return output + +def getFcstCloudFrac(cfr,pmid,psfc,layerDefinitions): # cfr is cloud fraction(%), pmid is 3D pressure(Pa), psfc is surface pressure (Pa) code from UPP ./INITPOST.F + + if pmid.shape != cfr.shape: # sanity check + print('dimension mismatch bewteen cldfra and pressure') + sys.exit() + + nlocs, nlevs = pmid.shape + + if len(psfc) != nlocs: # another sanity check + print('dimension mismatch bewteen cldfra and surface pressure') + sys.exit() + + cfracl = np.zeros(nlocs) + cfracm = np.zeros(nlocs) + cfrach = np.zeros(nlocs) + + for i in range(0,nlocs): + + PTOP_HIGH = PTOP_HIGH_UPP + if layerDefinitions.upper().strip() == 'ERA5': + PTOP_LOW = 0.8*psfc[i] + PTOP_MID = 0.45*psfc[i] + elif layerDefinitions.upper().strip() == 'UPP': + PTOP_LOW = PTOP_LOW_UPP + PTOP_MID = PTOP_MID_UPP + + idxLow = np.where( pmid[i,:] >= PTOP_LOW)[0] # using np.where with just 1 argument returns tuple + idxMid = np.where( (pmid[i,:] < PTOP_LOW) & (pmid[i,:] >= PTOP_MID))[0] + idxHigh = np.where( (pmid[i,:] < PTOP_MID) & (pmid[i,:] >= PTOP_HIGH))[0] + + # use conditions in case all indices are missing + if (len(idxLow) >0 ): cfracl[i] = np.max( cfr[i,idxLow] ) + if (len(idxMid) >0 ): cfracm[i] = np.max( cfr[i,idxMid] ) + if (len(idxHigh) >0 ): cfrach[i] = np.max( cfr[i,idxHigh] ) + + tmp = np.vstack( (cfracl,cfracm,cfrach)) # stack the rows into one 2d array + cldfraMax = np.max(tmp,axis=0) # get maximum value across low/mid/high for each pixel (minimum overlap assumption) + + # This is the fortran code put into python format...double loop unnecessary and slow + #for i in range(0,nlocs): + # for k in range(0,nlevs): + # if pmid(i,k) >= PTOP_LOW: + # cfracl(i) = np.max( [cfracl(i),cfr(i,k)] ) # Low + # elif pmid(i,k) < PTOP_LOW and pmid(i,k) >= PTOP_MID: + # cfracm(i) = np.max( [cfracm(i),cfr(i,k)] ) # Mid + # elif pmid(i,k) < PTOP_MID and pmid(i,k) >= PTOP_HIGH: # High + # cfrach(i) = np.max( [cfrach(i),cfr(i,k)] ) + + return cfracl, cfracm, cfrach, cldfraMax + +def getGOES16LatLon(g16_data_file): + + # Start timer + startTime = dt.datetime.utcnow() + + # designate dataset + g16nc = Dataset(g16_data_file, 'r') + + # GOES-R projection info and retrieving relevant constants + proj_info = g16nc.variables['goes_imager_projection'] + lon_origin = proj_info.longitude_of_projection_origin + H = proj_info.perspective_point_height+proj_info.semi_major_axis + r_eq = proj_info.semi_major_axis + r_pol = proj_info.semi_minor_axis + + # Data info + lat_rad_1d = g16nc.variables['x'][:] + lon_rad_1d = g16nc.variables['y'][:] + + # close file when finished + g16nc.close() + g16nc = None + + # create meshgrid filled with radian angles + lat_rad,lon_rad = np.meshgrid(lat_rad_1d,lon_rad_1d) + + # lat/lon calc routine from satellite radian angle vectors + + lambda_0 = (lon_origin*np.pi)/180.0 + + a_var = np.power(np.sin(lat_rad),2.0) + (np.power(np.cos(lat_rad),2.0)*(np.power(np.cos(lon_rad),2.0)+(((r_eq*r_eq)/(r_pol*r_pol))*np.power(np.sin(lon_rad),2.0)))) + b_var = -2.0*H*np.cos(lat_rad)*np.cos(lon_rad) + c_var = (H**2.0)-(r_eq**2.0) + + r_s = (-1.0*b_var - np.sqrt((b_var**2)-(4.0*a_var*c_var)))/(2.0*a_var) + + s_x = r_s*np.cos(lat_rad)*np.cos(lon_rad) + s_y = - r_s*np.sin(lat_rad) + s_z = r_s*np.cos(lat_rad)*np.sin(lon_rad) + + lat = (180.0/np.pi)*(np.arctan(((r_eq*r_eq)/(r_pol*r_pol))*((s_z/np.sqrt(((H-s_x)*(H-s_x))+(s_y*s_y)))))) + lon = (lambda_0 - np.arctan(s_y/(H-s_x)))*(180.0/np.pi) + + # End timer + endTime = dt.datetime.utcnow() + time = (endTime - startTime).microseconds / (1000.0*1000.0) + print('took %f4.1 seconds to get GOES16 lat/lon'%(time)) + + return lon,lat # lat/lon are 2-d arrays + +# -- +def getGOESRetrivalData(goesFile,goesVar): + + if not os.path.exists(goesFile): + print(goesFile+' not there. exit') + sys.exit() + + # First get GOES lat/lon + goesLon2d, goesLat2d = getGOES16LatLon(goesFile) # 2-d arrays + goesLon = goesLon2d.flatten() # 1-d arrays + goesLat = goesLat2d.flatten() + + # Now open the file and get the data we want + nc_goes = Dataset(goesFile, "r", format="NETCDF4") + + # If the next line is true (it should be), this indicates the variable needs to be treated + # as an "unsigned 16-bit integer". This is a pain. So we must use the "astype" method + # to change the variable type BEFORE applying scale_factor and add_offset. After the conversion + # we then can manually apply the scale factor and offset + #goesVar = 'PRES' + goesVar = goesVar.strip() # for safety + if nc_goes.variables[goesVar]._Unsigned.lower().strip() == 'true': + nc_goes.set_auto_scale(False) # Don't automatically apply scale_factor and add_offset to variable + goesData2d = np.array( nc_goes.variables[goesVar]).astype(np.uint16) + goesData2d = goesData2d * nc_goes.variables[goesVar].scale_factor + nc_goes.variables[goesVar].add_offset + goesQC2d = np.array( nc_goes.variables['DQF']).astype(np.uint8) + else: + goesData2d = np.array( nc_goes.variables[goesVar]) + goesQC2d = np.array( nc_goes.variables['DQF']) + + # Make variables 1-d + goesQC = goesQC2d.flatten() + goesData = goesData2d.flatten() + nc_goes.close() + + # Get rid of NaNs; base it on longitude + goesData = goesData[~np.isnan(goesLon)] # Handle data arrays first before changing lat/lon itself + goesQC = goesQC[~np.isnan(goesLon)] + goesLon = goesLon[~np.isnan(goesLon)] # ~ is "logical not", also np.logical_not + goesLat = goesLat[~np.isnan(goesLat)] + if goesLon.shape != goesLat.shape: + print('GOES lat/lon shape mismatch') + sys.exit() + + # If goesQC == 0, good QC and there was a cloud with a valid pressure. + # If goesQC == 4, no cloud; probably clear sky. + # All other QC means no data, and we want to remove those points + idx = np.logical_or( goesQC == 0, goesQC == 4) # Only keep QC == 0 or 4 + goesData = goesData[idx] + goesQC = goesQC[idx] + goesLon = goesLon[idx] + goesLat = goesLat[idx] + + # Only QC with 0 or 4 are left; now set QC == 4 to missing to indicate clear sky + goesData = np.where( goesQC != 0, missing_values, goesData) + + # Get longitude to between (0,360) for consistency with JEDI files (this check is applied to JEDI files, too) + goesLon = np.where( goesLon < 0, goesLon + 360.0, goesLon ) + + print('Min GOES Lon = ',np.min(goesLon)) + print('Max GOES Lon = ',np.max(goesLon)) + + return goesLon, goesLat, goesData + +def point2point(source,inputDir,satellite,channel,goesFile,condition,layerDefinitions,dataSource): + + # Static Variables for QC and obs + qcVar = 'brightness_temperature_'+str(channel)+'@EffectiveQC' #'@EffectiveQC0' # QC variable + obsVar = 'brightness_temperature_'+str(channel)+'@ObsValue' # Observation variable + + # Get GOES-16 retrieval file with auxiliary information + if 'abi' in satellite or 'ahi' in satellite: + goesLon, goesLat, goesData = getGOESRetrivalData(goesFile,'PRES') # return 1-d arrays + lonlatGOES = np.array( list(zip(goesLon, goesLat))) # lon/lat pairs for each GOES ob (nobs_GOES, 2) + #print('shape lonlatGOES = ',lonlatGOES.shape) + print('getting data from ',goesFile) + myGOESInterpolator = NearestNDInterpolator(lonlatGOES,goesData) + + # First check to see if there's a concatenated file with all obs. + # If so, use that. If not, have to process one file per processor, which takes a lot more time + if os.path.exists(inputDir+'/obsout_omb_'+satellite+'_ALL.nc4'): + inputFiles = [inputDir+'/obsout_omb_'+satellite+'_ALL.nc4'] # needs to be in a list since we loop over inputFiles + else: + # Get list of OMB files to process. There is one file per processor. + # Need to get them in order so they are called in the same order for the + # forecast and observed passes through this subroutine. + files = os.listdir(inputDir) + inputFiles = fnmatch.filter(files,'obsout*_'+satellite+'*nc4') # returns relative path names + inputFiles = [inputDir+'/'+s for s in inputFiles] # add on directory name + inputFiles.sort() # Get in order from low to high + if len(inputFiles) == 0: return -99999, -99999 # if no matching files, force a failure + + # Variable to pull for brightness temperature +# if dataSource == 1: v = 'brightness_temperature_'+str(channel)+'@GsiHofXBc' # Forecast variable + if dataSource == 1: v = 'brightness_temperature_'+str(channel)+'@hofx' #'@depbg' # OMB + if dataSource == 2: v = obsVar + + # Read the files and put data in array + allData, allDataQC = [], [] + for inputFile in inputFiles: + nc_fid = Dataset(inputFile, "r", format="NETCDF4") #Dataset is the class behavior to open the file + print('Trying to read ',v,' from ',inputFile) + + # Read forecast/obs data + read_var = nc_fid.variables[v] # extract/copy the data + # read_missing = read_var.missing_value # get variable attributes. Each dataset has own missing values. + this_var = np.array( read_var ) # to numpy array + # this_var = np.where( this_var==read_missing, np.nan, this_var ) + + #if dataSource == 1: # If true, we just read in OMB data, but we want B + # obsData = np.array( nc_fid.variables[obsVar]) + # this_var = obsData - this_var # get background/forecast value (O - OMB = B) + + #Read QC data + qcData = np.array(nc_fid.variables[qcVar]) + + # Sanity check...shapes should match + if qcData.shape != this_var.shape: return -99999, -99999 + + if 'abi' in satellite or 'ahi' in satellite: + + # Get the GOES-16 retrieval data at the observation locations in this file + # GOES values < 0 mean clear sky + lats = np.array(nc_fid.variables['latitude@MetaData']) + lons = np.array(nc_fid.variables['longitude@MetaData']) + + # Get longitude to between (0,360) for consistency with GOES-16 files + lons = np.where( lons < 0, lons + 360.0, lons ) + + lonlat = np.array( list(zip(lons,lats))) # lon/lat pairs for each ob (nobs, 2) + thisGOESData = myGOESInterpolator(lonlat) # GOES data at obs locations in this file. If pressure, units are hPa + thisGOESData = thisGOESData * 100.0 # get into Pa + + #obsCldfra = np.array( nc_fid.variables['cloud_area_fraction@MetaData'] )*100.0 # Get into %...observed cloud fraction (AHI/ABI only) + + geoValsFile = inputFile.replace('obsout','geoval') + if not os.path.exists(geoValsFile): + print(geoValsFile+' not there. exit') + sys.exit() + + nc_fid2 = Dataset(geoValsFile, "r", format="NETCDF4") + fcstCldfra = np.array( nc_fid2.variables['cloud_area_fraction_in_atmosphere_layer'])*100.0 # Get into % + pressure = np.array( nc_fid2.variables['air_pressure']) # Pa + pressure_edges = np.array( nc_fid2.variables['air_pressure_levels']) # Pa + psfc = pressure_edges[:,-1] # Surface pressure (Pa)...array order is top down + if layerDefinitions.upper().strip() == 'ERA5': + PTOP_LOW = 0.8*psfc # these are arrays + PTOP_MID = 0.45*psfc + PTOP_HIGH = PTOP_HIGH_UPP * np.ones_like(psfc) + elif layerDefinitions.upper().strip() == 'UPP': + PTOP_LOW = PTOP_LOW_UPP # these are constants + PTOP_MID = PTOP_MID_UPP + PTOP_HIGH = PTOP_HIGH_UPP + else: + print('layerDefinitions = ',layerDefinitions,'is invalid. exit') + sys.exit() + fcstLow,fcstMid,fcstHigh,fcstTotCldFra = getFcstCloudFrac(fcstCldfra,pressure,psfc,layerDefinitions) # get low/mid/high/total forecast cloud fractions for each ob + nc_fid2.close() + + # Modify QC data based on correspondence between forecast and obs. qcData used to select good data later + # It's possible that there are multiple forecast layers, such that fcstLow,fcstMid,fcstHigh are all > $cldfraThresh + # However, GOES-16 CTP doesn't really account for layering. So, we need to remove layered clouds from the forecast, + # focusing only on the layers that we asked for when doing {low,mid,high}Only conditions + # The "|" is symbol for "np.logcal_or" + yes = 2.0 + no = 0.0 + cldfraThresh = 20.0 # percent + if qcData.shape == fcstTotCldFra.shape == thisGOESData.shape: # these should all match + print('Using condition ',condition,'for ABI/AHI') + + # Note that "&" is "np.logical_and" for boolean (true/false) quantities. + # Thus, each condition should be enclosed in parentheses + if condition.lower().strip() == 'clearOnly'.lower(): # clear in both forecast and obs + qcData = np.where( (fcstTotCldFra < cldfraThresh) & (thisGOESData <= 0.0), qcData, missing_values) + elif condition.lower().strip() == 'cloudyOnly'.lower(): # cloudy in both forecast and obs + qcData = np.where( (fcstTotCldFra >= cldfraThresh) & (thisGOESData > 0.0), qcData, missing_values) + elif condition.lower().strip() == 'lowOnly'.lower(): # low clouds in both forecast and obs + fcstLow = np.where( (fcstMid >= cldfraThresh) | ( fcstHigh >= cldfraThresh), missing_values, fcstLow) # remove mid, high + qcData = np.where( (fcstLow >= cldfraThresh) & ( thisGOESData >= PTOP_LOW), qcData, missing_values) + elif condition.lower().strip() == 'midOnly'.lower(): # mid clouds in both forecast and obs + fcstMid = np.where( (fcstLow >= cldfraThresh) | ( fcstHigh >= cldfraThresh), missing_values, fcstMid) # remove low, high + qcData = np.where( (fcstMid >= cldfraThresh) & (thisGOESData < PTOP_LOW) & (thisGOESData >= PTOP_MID), qcData, missing_values) + elif condition.lower().strip() == 'highOnly'.lower(): # high clouds in both forecast and obs + fcstHigh = np.where( (fcstLow >= cldfraThresh) | ( fcstMid >= cldfraThresh), missing_values, fcstHigh) # remove mid, high + qcData = np.where( (fcstHigh >= cldfraThresh) & (thisGOESData < PTOP_MID) & (thisGOESData >= PTOP_HIGH), qcData, missing_values) + elif condition.lower().strip() == 'fcstLow'.lower(): # low clouds in forecast (layers possible); obs could be anything + qcData = np.where( fcstLow >= cldfraThresh , qcData, missing_values) + elif condition.lower().strip() == 'fcstMid'.lower(): # low clouds in forecast (layers possible); obs could be anything + qcData = np.where( fcstMid >= cldfraThresh , qcData, missing_values) + elif condition.lower().strip() == 'fcstHigh'.lower(): # low clouds in forecast (layers possible); obs could be anything + qcData = np.where( fcstHigh >= cldfraThresh , qcData, missing_values) + elif condition.lower().strip() == 'cloudEventLow'.lower(): + if dataSource == 1: this_var = np.where( fcstLow >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( thisGOESData >= PTOP_LOW, yes, no ) + elif condition.lower().strip() == 'cloudEventMid'.lower(): + if dataSource == 1: this_var = np.where( fcstMid >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( (thisGOESData < PTOP_LOW) & (thisGOESData >= PTOP_MID), yes, no ) + elif condition.lower().strip() == 'cloudEventHigh'.lower(): + if dataSource == 1: this_var = np.where( fcstHigh >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( (thisGOESData < PTOP_MID) & (thisGOESData >= PTOP_HIGH), yes, no ) + elif condition.lower().strip() == 'cloudEventTot'.lower(): + if dataSource == 1: this_var = np.where( fcstTotCldFra >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( thisGOESData > 0.0, yes, no ) + elif condition.lower().strip() == 'all': + print("not doing any conditional verification or stratifying by event") + else: + print("condition = ",condition," not recognized.") + sys.exit() + #elif condition.lower().strip() == '4x4table'.lower(): + #if dataSource == 1: + # this_var = np.where( fcstLow >= cldfraThresh, yesLow, no ) + # this_var = this_var + np.where( fcstMid >= cldfraThresh, yesMid, no ) + # this_var = this_var + np.where( fcstHigh >= cldfraThresh, yesHigh, no ) + print('number removed = ', (qcData==missing_values).sum()) + #print('number passed = ', qcData.shape[0] - (qcData==missing_values).sum()) + else: + print('shape mismatch') + return -99999, -99999 + + # Append to arrays + allData.append(this_var) + allDataQC.append(qcData) + + nc_fid.close() # done with the file, so close it before going to next file in loop + + # We're now all done looping over the individul files + + # Get the indices with acceptable QC + allQC = np.concatenate(allDataQC) # Put list of numpy arrays into a single long 1-D numpy array. All QC data. + idx = np.where(allQC==0) # returns indices + + # Now get all the forecast/observed brightness temperature data with acceptable QC + this_var = np.concatenate(allData)[idx] # Put list of numpy arrays into a single long 1-D numpy array. This is all the forecast/obs data with good QC + numObs = this_var.shape[0] # number of points with good QC for this channel + print('Number of obs :',numObs) + + # Assume all the points actually fit into a square grid. Get the side of the square (use ceil to round up) + if numObs > 0: + l = np.ceil(np.sqrt(numObs)).astype('int') # Length of the side of the square + + # Make an array that can be reshaped into the square + raw_data1D = np.full(l*l,np.nan) # Initialize 1D array of length l**2 to np.nan + raw_data1D[0:numObs] = this_var[:] # Fill data to the extent possible. There will be some np.nan values at the end + raw_data = np.reshape(raw_data1D,(l,l)) # Reshape into "square grid" + + raw_data = np.where(np.isnan(raw_data), missing_values, raw_data) # replace np.nan to missing_values (for MET) + + met_data=raw_data.astype(float) # Give MET this info + + # Now need to tell MET the "grid" for the data + # Make a fake lat/lon grid going from 0.0 to 50.0 degrees, with the interval determined by number of points + griddedDatasets[source]['latDef'][0] = 0.0 # starting point + griddedDatasets[source]['latDef'][1] = np.diff(np.linspace(0,50,l)).round(6)[0] # interval (degrees) + griddedDatasets[source]['latDef'][2] = int(l) # number of points + griddedDatasets[source]['lonDef'][0:3] = griddedDatasets[source]['latDef'] + + gridInfo = getGridInfo(source, griddedDatasets[source]['gridType']) # 'LatLon' gridType + return met_data, gridInfo + + else: + return -99999, -99999 + +########### +def getGridInfo(source,gridType): + + if gridType == 'LatLon': + latDef = griddedDatasets[source]['latDef'] + lonDef = griddedDatasets[source]['lonDef'] + gridInfo = { + 'type': gridType, + 'name': source, + 'lat_ll': latDef[0], #-90.000, + 'lon_ll': lonDef[0], #-180.000, + 'delta_lat': latDef[1], #0.5000, + 'delta_lon': lonDef[1], #0.625, + 'Nlat': latDef[2], #361, + 'Nlon': lonDef[2], #576, + } + elif gridType == 'Gaussian': + gridInfo = { + 'type': gridType, + 'name': source, + 'nx': griddedDatasets[source]['nx'], + 'ny': griddedDatasets[source]['ny'], + 'lon_zero': griddedDatasets[source]['lon_zero'], + } + + return gridInfo + +def getAttrArray(source,variable,initTime,validTime): + + init = dt.datetime.strptime(initTime,"%Y%m%d%H") + valid = dt.datetime.strptime(validTime,"%Y%m%d%H") + lead, rem = divmod((valid-init).total_seconds(), 3600) + + attrs = { + + 'valid': valid.strftime("%Y%m%d_%H%M%S"), + 'init': init.strftime("%Y%m%d_%H%M%S"), + 'lead': str(int(lead)), + 'accum': '000000', + + 'name': variable, #'MERRA2_Cloud_Percentage' + 'long_name': variable, #'Cloud Percentage Levels', + 'level': 'ALL', + 'units': verifVariables[variable]['units'], + + 'grid': getGridInfo(source,griddedDatasets[source]['gridType']) + } + + #print(attrs) + #print(griddedDatasets[source]) + + return attrs + +######## END FUNCTIONS ########## + + +#if __name__ == "__main__": +dataFile, dataSource, variable, i_date, v_date, flag = sys.argv[1].split(":") +met_data = getDataArray(dataFile,dataSource,variable,flag) +attrs = getAttrArray(dataSource,variable,i_date,v_date) +print(attrs) From a45229ef5144b7a15e0f6038280f3fca1608b81f Mon Sep 17 00:00:00 2001 From: j-opatz Date: Tue, 30 May 2023 21:02:39 -0600 Subject: [PATCH 2/6] added documentation files, 3 new cases --- .github/parm/use_case_groups.json | 2 +- ..._fcstGFS_obsMERRA2_lowAndTotalCloudFrac.py | 137 +++ ...cstGFS_obsSATCORPS_cloudTopPressAndTemp.py | 136 +++ ...fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.py | 136 +++ ...stMPAS_obsSATCORPS_lowAndTotalCloudFrac.py | 60 +- internal/tests/use_cases/all_use_cases.txt | 4 + ...cstGFS_obsMERRA2_lowAndTotalCloudFrac.conf | 205 ++++ .../read_input_data.py | 911 ++++++++++++++++++ ...tGFS_obsSATCORPS_cloudTopPressAndTemp.conf | 171 ++++ .../read_input_data.py | 911 ++++++++++++++++++ ...stMPAS_obsMERRA2_lowAndTotalCloudFrac.conf | 202 ++++ .../read_input_data.py | 911 ++++++++++++++++++ 12 files changed, 3766 insertions(+), 20 deletions(-) create mode 100644 docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.py create mode 100644 docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.py create mode 100644 docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.py create mode 100644 parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.conf create mode 100755 parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac/read_input_data.py create mode 100644 parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.conf create mode 100755 parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp/read_input_data.py create mode 100644 parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.conf create mode 100755 parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac/read_input_data.py diff --git a/.github/parm/use_case_groups.json b/.github/parm/use_case_groups.json index 0567a7db3e..bc0c8b4ec9 100644 --- a/.github/parm/use_case_groups.json +++ b/.github/parm/use_case_groups.json @@ -11,7 +11,7 @@ }, { "category": "air_quality_and_comp", - "index_list": "0", + "index_list": "0-5", "run": false }, { diff --git a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.py b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.py new file mode 100644 index 0000000000..b11f018c1b --- /dev/null +++ b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.py @@ -0,0 +1,137 @@ +""" +GridStat: Cloud Fractions with Neighborhood and Probabilities +============================================================= + +model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.conf + +""" +############################################################################## +# Scientific Objective +# -------------------- +# +# This use case captures various statistical measures of two model comparisons +# for low and total cloud fraction with different neighborhood and probability +# settings for internal model metrics and to aid in future model updates +# + +############################################################################## +# Datasets +# -------- +# +# | **Forecast:** Global Forecast System (GFS) +# | **Observations:** Modern-Era Retrospective analysis for Research and Applications, Version 2 (MERRA2) +# | **Grid:** GPP 17km masking region +# +# | **Location:** All of the input data required for this use case can be found in the met_test sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases +# | This tarball should be unpacked into the directory that you will set the value of INPUT_BASE. See 'Running METplus' section for more information. +# + +############################################################################## +# METplus Components +# ------------------ +# +# This use case utilizes Python Embedding, which is called using the PYTHON_NUMPY keyword +# in the observation input template settings. The same Python script processes both forecast and +# observation datasets, but only the observation dataset is not set up for native ingest by MET. +# Two separate forecast fields are verified against two respective observation fields, +# with the Python script being passed the input file, the model name, the variable name being analyzed, +# the initialization and valid times, and a flag to indicate if the field passed is observation or forecast. +# This process is repeated with 3 instance names to GridStat, each with a different setting for regridding, +# neighborhood evaluation, thresholding, output line types, and output prefix names. + +############################################################################## +# METplus Workflow +# ---------------- +# +# GridStat is the only MET tool called in this example. +# It processes the following run time: +# +# | **Init:** 2021-07-03 12Z +# | **Forecast lead:** 36 hour +# | +# Because instance names are used, GridStat will run 3 times for this 1 initalization time. + +############################################################################## +# METplus Configuration +# --------------------- +# +# METplus first loads the default configuration file found in parm/metplus_config, +# then it loads any configuration files passed to METplus via the command line: +# parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.conf +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.conf + +############################################################################## +# MET Configuration +# ----------------- +# +# METplus sets environment variables based on user settings in the METplus configuration file. +# See :ref:`How METplus controls MET config file settings` for more details. +# +# **YOU SHOULD NOT SET ANY OF THESE ENVIRONMENT VARIABLES YOURSELF! THEY WILL BE OVERWRITTEN BY METPLUS WHEN IT CALLS THE MET TOOLS!** +# +# If there is a setting in the MET configuration file that is currently not supported by METplus you'd like to control, please refer to: +# :ref:`Overriding Unsupported MET config file settings` +# +# .. note:: See the :ref:`GridStat MET Configuration` section of the User's Guide for more information on the environment variables used in the file below: +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/met_config/GridStatConfig_wrapped + +############################################################################## +# Python Embedding +# ---------------- +# +# This use case utilizes 1 Python script to read and process both forecast and +# observation fields. +# parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac/read_input_data.py +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac/read_input_data.py + +############################################################################## +# Running METplus +# --------------- +# +# Pass the use case configuration file to the run_metplus.py script +# along with any user-specific system configuration files if desired:: +# +# run_metplus.py /path/to/METplus/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.conf /path/to/user_system.conf +# +# See :ref:`running-metplus` for more information. + +############################################################################## +# Expected Output +# --------------- +# +# A successful run will output the following both to the screen and to the logfile:: +# +# INFO: METplus has successfully finished running. +# +# Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. +# Output for this use case will be found in model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac +# (relative to **OUTPUT_BASE**) +# and will contain the following files: +# +# * grid_stat_GFS_TO_MERRA2_F36_CloudFracs_360000L_20210705_000000V_pairs.nc +# * grid_stat_GFS_to_MERRA2_F36_CloudFracs_360000L_20210705_000000V.stat +# * grid_stat_GFS_to_MERRA2_F36_CloudFracs_NBR_360000L_20210705_000000V_pairs.nc +# * grid_stat_GFS_to_MERRA2_F36_CloudFracs_NBR_360000L_20210705_000000V.stat +# * grid_stat_GFS_to_MERRA2_F36_CloudFracs_PROB_360000L_20210705_000000V_pairs.nc +# * grid_stat_GFS_to_MERRA2_F36_CloudFracs_PROB_360000L_20210705_000000V.stat + +############################################################################## +# Keywords +# -------- +# +# .. note:: +# +# * GridStatToolUseCase +# * NetCDFFileUseCase +# * AirQualityAndCompAppUseCase +# * PythonEmbeddingFileUseCase +# +# Navigate to the :ref:`quick-search` page to discover other similar use cases. +# +# sphinx_gallery_thumbnail_path = '_static/air_quality_and_comp-GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.png' diff --git a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.py b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.py new file mode 100644 index 0000000000..a7700e06f9 --- /dev/null +++ b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.py @@ -0,0 +1,136 @@ +""" +GridStat: Cloud Pressure and Temperature Heights +================================================ + +model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.conf + +""" +############################################################################## +# Scientific Objective +# -------------------- +# +# This use case captures various statistical measures of two model comparisons +# for cloud top pressures and temperatures with different neighborhood +# settings for internal model metrics and to aid in future model updates +# + +############################################################################## +# Datasets +# -------- +# +# | **Forecast:** Global Forecast System (GFS) +# | **Observations:** Satellite ClOud and Radiation Property retrieval System (SatCORPS) +# | **Grid:** GPP 17km masking region +# +# | **Location:** All of the input data required for this use case can be found in the met_test sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases +# | This tarball should be unpacked into the directory that you will set the value of INPUT_BASE. See 'Running METplus' section for more information. +# + +############################################################################## +# METplus Components +# ------------------ +# +# This use case utilizes Python Embedding, which is called using the PYTHON_NUMPY keyword +# in the observation input template settings. The same Python script can processes both forecast and +# observation datasets, but only the observation dataset is not +# set up for native ingest by MET. Two separate forecast fields are verified against two respective observation fields, +# with the Python script being passed the input file, the model name, the variable name being analyzed, +# the initialization and valid times, and a flag to indicate if the field passed is observation or forecast. +# This process is repeated with 2 instance names to GridStat, each with a different setting for regridding, +# neighborhood evaluation, thresholding, output line types, and output prefix names. + +############################################################################## +# METplus Workflow +# ---------------- +# +# GridStat is the only MET tool called in this example. +# It processes the following run time: +# +# | **Init:** 2022-07-03 12Z +# | **Forecast lead:** 36 hour +# | +# Because instance names are used, GridStat will run 2 times for this 1 initalization time. + +############################################################################## +# METplus Configuration +# --------------------- +# +# METplus first loads the default configuration file found in parm/metplus_config, +# then it loads any configuration files passed to METplus via the command line: +# parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.conf +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.conf + +############################################################################## +# MET Configuration +# ----------------- +# +# METplus sets environment variables based on user settings in the METplus configuration file. +# See :ref:`How METplus controls MET config file settings` for more details. +# +# **YOU SHOULD NOT SET ANY OF THESE ENVIRONMENT VARIABLES YOURSELF! THEY WILL BE OVERWRITTEN BY METPLUS WHEN IT CALLS THE MET TOOLS!** +# +# If there is a setting in the MET configuration file that is currently not supported by METplus you'd like to control, please refer to: +# :ref:`Overriding Unsupported MET config file settings` +# +# .. note:: See the :ref:`GridStat MET Configuration` section of the User's Guide for more information on the environment variables used in the file below: +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/met_config/GridStatConfig_wrapped + +############################################################################## +# Python Embedding +# ---------------- +# +# This use case utilizes 1 Python script to read and process the observation fields. +# parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp/read_input_data.py +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp/read_input_data.py + +############################################################################## +# Running METplus +# --------------- +# +# Pass the use case configuration file to the run_metplus.py script +# along with any user-specific system configuration files if desired:: +# +# run_metplus.py /path/to/METplus/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.conf /path/to/user_system.conf +# +# See :ref:`running-metplus` for more information. + +############################################################################## +# Expected Output +# --------------- +# +# A successful run will output the following both to the screen and to the logfile:: +# +# INFO: METplus has successfully finished running. +# +# Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. +# Output for this use case will be found in model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac +# (relative to **OUTPUT_BASE**) +# and will contain the following files: +# +# * grid_stat_GFS_to_SATCORPS_F36_CloudHgts_360000L_20220705_000000V_pairs.nc +# * grid_stat_GFS_to_SATCORPS_F36_CloudHgts_360000L_20220705_000000V_ctc.txt +# * grid_stat_GFS_to_SATCORPS_F36_CloudHgts_360000L_20220705_000000V_cts.txt +# * grid_stat_GFS_to_SATCORPS_F36_CloudHgts_360000L_20220705_000000V.stat +# * grid_stat_GFS_to_SATCORPS_F36_CloudHgts_NBR_360000L_20220705_000000V_pairs.nc +# * grid_stat_GFS_to_SATCORPS_F36_CloudHgts_NBR_360000L_20220705_000000V.stat + +############################################################################## +# Keywords +# -------- +# +# .. note:: +# +# * GridStatToolUseCase +# * NetCDFFileUseCase +# * AirQualityAndCompAppUseCase +# * PythonEmbeddingFileUseCase +# +# Navigate to the :ref:`quick-search` page to discover other similar use cases. +# +# sphinx_gallery_thumbnail_path = '_static/air_quality_and_comp-GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.png' diff --git a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.py b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.py new file mode 100644 index 0000000000..395d37c3f6 --- /dev/null +++ b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.py @@ -0,0 +1,136 @@ +""" +GridStat: Cloud Fractions with Neighborhood and Probabilities +============================================================= + +model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.conf + +""" +############################################################################## +# Scientific Objective +# -------------------- +# +# This use case captures various statistical measures of two model comparisons +# for low and total cloud fraction with different neighborhood and probability +# settings for internal model metrics and to aid in future model updates +# + +############################################################################## +# Datasets +# -------- +# +# | **Forecast:** Model for Prediction Across Scales (MPAS) +# | **Observations:** Modern-Era Retrospective analysis for Research and Applications, Version 2 (MERRA2) +# | **Grid:** GPP 17km masking region +# +# | **Location:** All of the input data required for this use case can be found in the met_test sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases +# | This tarball should be unpacked into the directory that you will set the value of INPUT_BASE. See 'Running METplus' section for more information. +# + +############################################################################## +# METplus Components +# ------------------ +# +# This use case utilizes Python Embedding, which is called using the PYTHON_NUMPY keyword +# in the forecast and observation input template settings. The same Python script processes both forecast and +# observation datasets. Two separate forecast fields are verified against two respective observation fields, +# with the Python script being passed the input file, the model name, the variable name being analyzed, +# the initialization and valid times, and a flag to indicate if the field passed is observation or forecast. +# This process is repeated with 3 instance names to GridStat, each with a different setting for regridding, +# neighborhood evaluation, thresholding, output line types, and output prefix names. + +############################################################################## +# METplus Workflow +# ---------------- +# +# GridStat is the only MET tool called in this example. +# It processes the following run time: +# +# | **Init:** 2020-07-23 00Z +# | **Forecast lead:** 36 hour +# | +# Because instance names are used, GridStat will run 3 times for this 1 initalization time. + +############################################################################## +# METplus Configuration +# --------------------- +# +# METplus first loads the default configuration file found in parm/metplus_config, +# then it loads any configuration files passed to METplus via the command line: +# parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.conf +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.conf + +############################################################################## +# MET Configuration +# ----------------- +# +# METplus sets environment variables based on user settings in the METplus configuration file. +# See :ref:`How METplus controls MET config file settings` for more details. +# +# **YOU SHOULD NOT SET ANY OF THESE ENVIRONMENT VARIABLES YOURSELF! THEY WILL BE OVERWRITTEN BY METPLUS WHEN IT CALLS THE MET TOOLS!** +# +# If there is a setting in the MET configuration file that is currently not supported by METplus you'd like to control, please refer to: +# :ref:`Overriding Unsupported MET config file settings` +# +# .. note:: See the :ref:`GridStat MET Configuration` section of the User's Guide for more information on the environment variables used in the file below: +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/met_config/GridStatConfig_wrapped + +############################################################################## +# Python Embedding +# ---------------- +# +# This use case utilizes 1 Python script to read and process both forecast and +# observation fields. +# parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac/read_input_data.py +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac/read_input_data.py + +############################################################################## +# Running METplus +# --------------- +# +# Pass the use case configuration file to the run_metplus.py script +# along with any user-specific system configuration files if desired:: +# +# run_metplus.py /path/to/METplus/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.conf /path/to/user_system.conf +# +# See :ref:`running-metplus` for more information. + +############################################################################## +# Expected Output +# --------------- +# +# A successful run will output the following both to the screen and to the logfile:: +# +# INFO: METplus has successfully finished running. +# +# Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. +# Output for this use case will be found in model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac +# (relative to **OUTPUT_BASE**) +# and will contain the following files: +# +# * grid_stat_MPAS_to_MERRA2_F36_CloudFracs_360000L_20200724_120000V_pairs.nc +# * grid_stat_MPAS_to_MERRA2_F36_CloudFracs_360000L_20200724_120000V.stat +# * grid_stat_MPAS_to_MERRA2_F36_CloudFracs_NBR_360000L_20200724_120000V_pairs.nc +# * grid_stat_MPAS_to_MERRA2_F36_CloudFracs_NBR_360000L_20200724_120000V.stat +# * grid_stat_MPAS_to_MERRA2_F36_CloudFracs_PROB_360000L_20200724_120000V_pairs.nc +# * grid_stat_MPAS_to_MERRA2_F36_CloudFracs_PROB_360000L_20200724_120000V.stat + +############################################################################## +# Keywords +# -------- +# +# .. note:: +# +# * GridStatToolUseCase +# * NetCDFFileUseCase +# * AirQualityAndCompAppUseCase +# * PythonEmbeddingFileUseCase +# +# Navigate to the :ref:`quick-search` page to discover other similar use cases. +# +# sphinx_gallery_thumbnail_path = '_static/air_quality_and_comp-GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.png' diff --git a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.py b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.py index 1e26265475..a6905bb2bd 100644 --- a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.py +++ b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.py @@ -1,6 +1,6 @@ """ -GridStat: Cloud Fractions with Various Settings -=============================================== +GridStat: Cloud Fractions with Neighborhood and Probabilities +============================================================= model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.conf @@ -30,19 +30,25 @@ # METplus Components # ------------------ # -# This use case utilizes the METplus PlotPointObs wrapper to generate a -# command to run the MET tool plot_point_obs if all required files are found. +# This use case utilizes Python Embedding, which is called using the PYTHON_NUMPY keyword +# in the forecast and observation input template settings. The same Python script processes both forecast and +# observation datasets. Two separate forecast fields are verified against two respective observation fields, +# with the Python script being passed the input file, the model name, the variable name being analyzed, +# the initialization and valid times, and a flag to indicate if the field passed is observation or forecast. +# This process is repeated with 3 instance names to GridStat, each with a different setting for regridding, +# neighborhood evaluation, thresholding, output line types, and output prefix names. ############################################################################## # METplus Workflow # ---------------- # -# PlotPointObs is the only tool called in this example. +# GridStat is the only MET tool called in this example. # It processes the following run time: # -# | **Valid:** 2012-04-09 12Z -# | **Forecast lead:** 12 hour +# | **Init:** 2020-07-23 00Z +# | **Forecast lead:** 36 hour # | +# Because instance names are used, GridStat will run 3 times for this 1 initalization time. ############################################################################## # METplus Configuration @@ -50,10 +56,10 @@ # # METplus first loads the default configuration file found in parm/metplus_config, # then it loads any configuration files passed to METplus via the command line: -# parm/use_cases/met_tool_wrapper/PlotPointObs/PlotPointObs.conf +# parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.conf # # .. highlight:: bash -# .. literalinclude:: ../../../../parm/use_cases/met_tool_wrapper/PlotPointObs/PlotPointObs.conf +# .. literalinclude:: ../../../../parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.conf ############################################################################## # MET Configuration @@ -67,11 +73,21 @@ # If there is a setting in the MET configuration file that is currently not supported by METplus you'd like to control, please refer to: # :ref:`Overriding Unsupported MET config file settings` # -# .. note:: See the :ref:`PlotPointObs MET Configuration` section of the User's Guide for more information on the environment variables used in the file below: +# .. note:: See the :ref:`GridStat MET Configuration` section of the User's Guide for more information on the environment variables used in the file below: # # .. highlight:: bash -# .. literalinclude:: ../../../../parm/met_config/PlotPointObsConfig_wrapped +# .. literalinclude:: ../../../../parm/met_config/GridStatConfig_wrapped +############################################################################## +# Python Embedding +# ---------------- +# +# This use case utilizes 1 Python script to read and process both forecast and +# observation fields. +# parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac/read_input_data.py +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac/read_input_data.py ############################################################################## # Running METplus @@ -80,7 +96,7 @@ # Pass the use case configuration file to the run_metplus.py script # along with any user-specific system configuration files if desired:: # -# run_metplus.py /path/to/METplus/parm/use_cases/met_tool_wrapper/PlotPointObs/PlotPointObs.conf /path/to/user_system.conf +# run_metplus.py /path/to/METplus/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.conf /path/to/user_system.conf # # See :ref:`running-metplus` for more information. @@ -93,11 +109,16 @@ # INFO: METplus has successfully finished running. # # Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. -# Output for this use case will be found in plot_point_obs +# Output for this use case will be found in model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac # (relative to **OUTPUT_BASE**) -# and will contain the following file: -# -# * nam_and_ndas.20120409.t12z.prepbufr_CONFIG.ps +# and will contain the following files: +# +# * grid_stat_MPAS_F36_CloudFracs_360000L_20200724_120000V_pairs.nc +# * grid_stat_MPAS_F36_CloudFracs_360000L_20200724_120000V.stat +# * grid_stat_MPAS_F36_CloudFracs_NBR_360000L_20200724_120000V_pairs.nc +# * grid_stat_MPAS_F36_CloudFracs_NBR_360000L_20200724_120000V.stat +# * grid_stat_MPAS_F36_CloudFracs_PROB_360000L_20200724_120000V_pairs.nc +# * grid_stat_MPAS_F36_CloudFracs_PROB_360000L_20200724_120000V.stat ############################################################################## # Keywords @@ -105,10 +126,11 @@ # # .. note:: # -# * PlotPointObsToolUseCase -# * GRIBFileUseCase +# * GridStatToolUseCase # * NetCDFFileUseCase +# * AirQualityAndCompAppUseCase +# * PythonEmbeddingFileUseCase # # Navigate to the :ref:`quick-search` page to discover other similar use cases. # -# sphinx_gallery_thumbnail_path = '_static/met_tool_wrapper-PlotPointObs.png' +# sphinx_gallery_thumbnail_path = '_static/air_quality_and_comp-GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.png' diff --git a/internal/tests/use_cases/all_use_cases.txt b/internal/tests/use_cases/all_use_cases.txt index fbd7fa0131..f2148826c5 100644 --- a/internal/tests/use_cases/all_use_cases.txt +++ b/internal/tests/use_cases/all_use_cases.txt @@ -64,6 +64,10 @@ Category: met_tool_wrapper Category: air_quality_and_comp 0::EnsembleStat_fcstICAP_obsMODIS_aod::model_applications/air_quality_and_comp/EnsembleStat_fcstICAP_obsMODIS_aod.conf +1::GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac::model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.conf:: pandac_env, py_embed +2::GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp::model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.conf:: pandac_env, py_embed +3::GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac::model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.conf:: pandac_env, py_embed +4::GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac::model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.conf:: pandac_env, py_embed Category: climate diff --git a/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.conf b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.conf new file mode 100644 index 0000000000..7b8fec3a77 --- /dev/null +++ b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.conf @@ -0,0 +1,205 @@ +[config] + +# Documentation for this use case can be found at +# https://metplus.readthedocs.io/en/latest/generated/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.html + +# For additional information, please see the METplus Users Guide. +# https://metplus.readthedocs.io/en/latest/Users_Guide + +# ### +# Processes to run +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#process-list +### + + +PROCESS_LIST = GridStat, GridStat(nbr), GridStat(prob) + +### +# Time Info +# LOOP_BY options are INIT, VALID, RETRO, and REALTIME +# If set to INIT or RETRO: +# INIT_TIME_FMT, INIT_BEG, INIT_END, and INIT_INCREMENT must also be set +# If set to VALID or REALTIME: +# VALID_TIME_FMT, VALID_BEG, VALID_END, and VALID_INCREMENT must also be set +# LEAD_SEQ is the list of forecast leads to process +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#timing-control +### + +LOOP_BY = INIT +INIT_TIME_FMT = %Y%m%d%H +INIT_BEG=2021070312 +INIT_END=2021070312 +INIT_INCREMENT = 12H + +LEAD_SEQ = 36 + +LOOP_ORDER = times + +### +# File I/O +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#directory-and-filename-template-info +### + +FCST_GRID_STAT_INPUT_DIR = {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac +FCST_GRID_STAT_INPUT_TEMPLATE = {init?fmt=%Y%m%d}.gfs.t12z.pgrb2.0p25.f0{LEAD_SEQ} + +OBS_GRID_STAT_INPUT_DIR = +OBS_GRID_STAT_INPUT_TEMPLATE = PYTHON_NUMPY + +GRID_STAT_CLIMO_MEAN_INPUT_DIR = +GRID_STAT_CLIMO_MEAN_INPUT_TEMPLATE = + +GRID_STAT_CLIMO_STDEV_INPUT_DIR = +GRID_STAT_CLIMO_STDEV_INPUT_TEMPLATE = + +GRID_STAT_OUTPUT_DIR = {OUTPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac +GRID_STAT_OUTPUT_TEMPLATE = + + +### +# Field Info +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#field-info +### + +MODEL = GFS +OBTYPE = MERRA2 + +CONFIG_DIR = {PARM_BASE}/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac + +FCST_VAR1_NAME = TCDC +FCST_VAR1_LEVELS = R636 +FCST_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 +FCST_VAR2_NAME = LCDC +FCST_VAR2_LEVELS = R630 +FCST_VAR2_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 + +OBS_VAR1_NAME = {CONFIG_DIR}/read_input_data.py {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac/MERRA2_401.tavg1_2d_rad_Nx.{valid?fmt=%Y%m%d}.nc4:{OBTYPE}:totalCloudFrac:{init?fmt=%Y%m%d%H}:{valid?fmt=%Y%m%d%H}:2 +OBS_VAR1_LEVELS = +OBS_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 +OBS_VAR2_NAME = {CONFIG_DIR}/read_input_data.py {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac/MERRA2_401.tavg1_2d_rad_Nx.{valid?fmt=%Y%m%d}.nc4:{OBTYPE}:lowCloudFrac:{init?fmt=%Y%m%d%H}:{valid?fmt=%Y%m%d%H}:2 +OBS_VAR2_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 + + +### +# GridStat Settings +# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#gridstat +### + +#LOG_GRID_STAT_VERBOSITY = 2 + +GRID_STAT_CONFIG_FILE = {PARM_BASE}/met_config/GridStatConfig_wrapped + +GRID_STAT_REGRID_TO_GRID = FCST +GRID_STAT_REGRID_METHOD = BILIN +GRID_STAT_REGRID_WIDTH = 2 + +GRID_STAT_DESC = + +FCST_GRID_STAT_FILE_WINDOW_BEGIN = 0 +FCST_GRID_STAT_FILE_WINDOW_END = 0 +OBS_GRID_STAT_FILE_WINDOW_BEGIN = 0 +OBS_GRID_STAT_FILE_WINDOW_END = 0 + +GRID_STAT_NEIGHBORHOOD_WIDTH = 1 +GRID_STAT_NEIGHBORHOOD_SHAPE = SQUARE + +GRID_STAT_NEIGHBORHOOD_COV_THRESH = >=0.5 + +GRID_STAT_ONCE_PER_FIELD = False + +FCST_IS_PROB = false + +FCST_GRID_STAT_PROB_THRESH = ==0.1 + +OBS_IS_PROB = false + +OBS_GRID_STAT_PROB_THRESH = ==0.1 + +GRID_STAT_OUTPUT_PREFIX = {MODEL}_to_{OBTYPE}_F{lead?fmt=%H}_CloudFracs + +GRID_STAT_OUTPUT_FLAG_FHO = STAT +GRID_STAT_OUTPUT_FLAG_CTC = STAT +GRID_STAT_OUTPUT_FLAG_CTS = STAT +GRID_STAT_OUTPUT_FLAG_CNT = STAT +GRID_STAT_OUTPUT_FLAG_SL1L2 = STAT +GRID_STAT_OUTPUT_FLAG_GRAD = STAT + +GRID_STAT_NC_PAIRS_FLAG_LATLON = TRUE +GRID_STAT_NC_PAIRS_FLAG_RAW = TRUE +GRID_STAT_NC_PAIRS_FLAG_DIFF = TRUE +GRID_STAT_NC_PAIRS_FLAG_CLIMO = FALSE +GRID_STAT_NC_PAIRS_FLAG_GRADIENT = TRUE +GRID_STAT_NC_PAIRS_FLAG_APPLY_MASK = TRUE + +GRID_STAT_MASK_POLY = {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac/GPP_17km_60S_60N_mask.nc + +[nbr] + +FCST_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0, >SFP20, >SFP30, >SFP40, >SFP50, >SFP60, >SFP70, >SFP80 +FCST_VAR2_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0, >SFP20, >SFP30, >SFP40, >SFP50, >SFP60, >SFP70, >SFP80 + + +OBS_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0, >SOP20, >SOP30, >SOP40, >SOP50, >SOP60, >SOP70, >SOP80 +OBS_VAR2_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0, >SOP20, >SOP30, >SOP40, >SOP50, >SOP60, >SOP70, >SOP80 + + +GRID_STAT_NEIGHBORHOOD_WIDTH = 3, 5, 7, 9 +GRID_STAT_NEIGHBORHOOD_SHAPE = CIRCLE + +GRID_STAT_NEIGHBORHOOD_COV_THRESH = >0.0 + +GRID_STAT_OUTPUT_FLAG_FHO = NONE +GRID_STAT_OUTPUT_FLAG_CTC = NONE +GRID_STAT_OUTPUT_FLAG_CTS = NONE +GRID_STAT_OUTPUT_FLAG_CNT = NONE +GRID_STAT_OUTPUT_FLAG_SL1L2 = NONE +GRID_STAT_OUTPUT_FLAG_NBRCTC = STAT +GRID_STAT_OUTPUT_FLAG_NBRCTS = STAT +GRID_STAT_OUTPUT_FLAG_NBRCNT = STAT +GRID_STAT_OUTPUT_FLAG_GRAD = NONE + +GRID_STAT_NC_PAIRS_FLAG_LATLON = TRUE +GRID_STAT_NC_PAIRS_FLAG_RAW = TRUE +GRID_STAT_NC_PAIRS_FLAG_DIFF = TRUE +GRID_STAT_NC_PAIRS_FLAG_NBRHD = TRUE +GRID_STAT_NC_PAIRS_FLAG_GRADIENT = TRUE +GRID_STAT_NC_PAIRS_FLAG_APPLY_MASK = TRUE + +GRID_STAT_OUTPUT_PREFIX = {MODEL}_to_{OBTYPE}_F{lead?fmt=%H}_CloudFracs_NBR + +[prob] + +OBS_VAR1_THRESH = gt0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 +OBS_VAR2_THRESH = gt0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 +FCST_IS_PROB = TRUE + +FCST_VAR1_THRESH = >0.1, >0.2, >0.3, >0.4, >0.5, >0.6, >0.7, >0.8, >0.9, >1.0 +FCST_VAR2_THRESH = >0.1, >0.2, >0.3, >0.4, >0.5, >0.6, >0.7, >0.8, >0.9, >1.0 +GRID_STAT_NEIGHBORHOOD_WIDTH = 3, 5, 7, 9 +GRID_STAT_NEIGHBORHOOD_SHAPE = CIRCLE + +GRID_STAT_NEIGHBORHOOD_COV_THRESH = >0.0 + +GRID_STAT_OUTPUT_FLAG_FHO = NONE +GRID_STAT_OUTPUT_FLAG_CTC = NONE +GRID_STAT_OUTPUT_FLAG_CTS = NONE +GRID_STAT_OUTPUT_FLAG_CNT = NONE +GRID_STAT_OUTPUT_FLAG_SL1L2 = NONE +GRID_STAT_OUTPUT_FLAG_NBRCTC = NONE +GRID_STAT_OUTPUT_FLAG_NBRCTS = NONE +GRID_STAT_OUTPUT_FLAG_NBRCNT = NONE +GRID_STAT_OUTPUT_FLAG_GRAD = NONE +GRID_STAT_OUTPUT_FLAG_PCT = STAT +GRID_STAT_OUTPUT_FLAG_PSTD = STAT +GRID_STAT_OUTPUT_FLAG_PJC = STAT +GRID_STAT_OUTPUT_FLAG_PRC = STAT + +GRID_STAT_NC_PAIRS_FLAG_LATLON = TRUE +GRID_STAT_NC_PAIRS_FLAG_RAW = TRUE +GRID_STAT_NC_PAIRS_FLAG_DIFF = TRUE +GRID_STAT_NC_PAIRS_FLAG_NBRHD = FALSE +GRID_STAT_NC_PAIRS_FLAG_GRADIENT = TRUE +GRID_STAT_NC_PAIRS_FLAG_APPLY_MASK = TRUE + +GRID_STAT_OUTPUT_PREFIX = {MODEL}_to_{OBTYPE}_F{lead?fmt=%H}_CloudFracs_PROB + diff --git a/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac/read_input_data.py b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac/read_input_data.py new file mode 100755 index 0000000000..aa436135a6 --- /dev/null +++ b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac/read_input_data.py @@ -0,0 +1,911 @@ +#this code was provided by Craig Schwartz +#and is largely unaltered from its original +#function. + +#from __future__ import print_function +import os +import sys +import numpy as np +import datetime as dt +from netCDF4 import Dataset # http://code.google.com/p/netcdf4-python/ +from scipy.interpolate import NearestNDInterpolator, LinearNDInterpolator +#### for Plotting +import matplotlib.cm as cm +import matplotlib.axes as maxes +import matplotlib.pyplot as plt +from mpl_toolkits.axes_grid1 import make_axes_locatable +#from mpl_toolkits.basemap import Basemap +import fnmatch +import pygrib +import pickle as pk +##### + +########################################### + +missing_values = -9999.0 # for MET + +# UPP top layer bounds (Pa) for cloud layers +PTOP_LOW_UPP = 64200. # low for > 64200 Pa +PTOP_MID_UPP = 35000. # mid between 35000-64200 Pa +PTOP_HIGH_UPP = 15000. # high between 15000-35000 Pa + +# Values for 4 x 4 contingency table +Na, Nb, Nc, Nd = 1, 2, 3, 4 +Ne, Nf, Ng, Nh = 5, 6, 7, 8 +Ni, Nj, Nk, Nl = 9, 10, 11, 12 +Nm, Nn, No, Np = 13, 14, 15, 16 + +# Notes: +# 1) Entry for 'point' is for point-to-point comparison and is all dummy data (except for gridType) that is overwritten by point2point +# 2) ERA5 on NCAR CISL RDA changed at some point. Old is ERA5_2017 (not used anymore), new is ERA5, which we'll use for 2020 data +griddedDatasets = { + 'MERRA2' : { 'gridType':'LatLon', 'latVar':'lat', 'latDef':[-90.0,0.50,361], 'lonVar':'lon', 'lonDef':[-180.0,0.625,576], 'flipY':True, 'ftype':'nc'}, + 'SATCORPS' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[-180.0,0.3125,1152], 'flipY':False, 'ftype':'nc' }, + 'ERA5_2017': { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-89.7848769072,0.281016829130516,640], 'lonVar':'longitude', 'lonDef':[0.0,0.28125,1280], 'flipY':False, 'ftype':'nc' }, + 'ERA5' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'nc' }, + 'GFS' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'grib'}, + 'GALWEM' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':True, 'ftype':'grib'}, + 'GALWEM17' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-89.921875,0.156250,1152], 'lonVar':'longitude', 'lonDef':[0.117187,0.234375,1536], 'flipY':False, 'ftype':'grib'}, + 'WWMCA' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'grib'}, + 'MPAS' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'nc'}, + 'SAT_WWMCA_MEAN' : { 'gridType':'LatLon', 'latVar':'lat','latDef':[-90.0,0.25,721], 'lonVar':'lon', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'nc' }, + 'point' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.156250,1152], 'lonVar':'longitude', 'lonDef':[0.117187,0.234375,1536], 'flipY':False, 'ftype':'nc'}, +} + #TODO:Correct one, but MET can ingest a Gaussian grid only in Grib2 format (from Randy B.) + #'ERA5' : { 'gridType':'Gaussian', 'nx':1280, 'ny':640, 'lon_zero':0, 'latVar':'latitude', 'lonVar':'longitude', 'flipY':False, }, + +#GALWEM, both 17-km and 0.25-degree +lowCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':3, 'typeOfFirstFixedSurface':10, 'shortName':'lcc' } +midCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':4, 'typeOfFirstFixedSurface':10, 'shortName':'mcc' } +highCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':5, 'typeOfFirstFixedSurface':10, 'shortName':'hcc' } +totalCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':10, 'shortName':'tcc' } +cloudTopHeight_GALWEM = { 'parameterCategory':6, 'parameterNumber':12, 'typeOfFirstFixedSurface':3, 'shortName':'cdct' } +cloudBaseHeight_GALWEM = { 'parameterCategory':6, 'parameterNumber':11, 'typeOfFirstFixedSurface':2, 'shortName':'cdcb' } + +#GFS +lowCloudFrac_GFS = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':214, 'shortName':'tcc' } +midCloudFrac_GFS = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':224, 'shortName':'tcc' } +highCloudFrac_GFS = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':234, 'shortName':'tcc' } + +#WWMCA +totalCloudFrac_WWMCA = { 'parameterName':71, 'typeOfLevel':'entireAtmosphere', 'level':0 } + +cloudTopHeightLev1_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':1 } +cloudTopHeightLev2_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':2 } +cloudTopHeightLev3_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':3 } +cloudTopHeightLev4_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':4 } +cloudTopHeight_WWMCA = [ cloudTopHeightLev1_WWMCA, cloudTopHeightLev2_WWMCA, cloudTopHeightLev3_WWMCA, cloudTopHeightLev4_WWMCA ] + +cloudBaseHeightLev1_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':1 } +cloudBaseHeightLev2_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':2 } +cloudBaseHeightLev3_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':3 } +cloudBaseHeightLev4_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':4 } +cloudBaseHeight_WWMCA = [ cloudBaseHeightLev1_WWMCA, cloudBaseHeightLev2_WWMCA, cloudBaseHeightLev3_WWMCA, cloudBaseHeightLev4_WWMCA ] + +verifVariablesModel = { + 'binaryCloud' : {'GFS':[''], 'GALWEM17':[totalCloudFrac_GALWEM], 'GALWEM':[totalCloudFrac_GALWEM], 'MPAS':['cldfrac_tot_UM_rand']}, + 'totalCloudFrac' : {'GFS':[''], 'GALWEM17':[totalCloudFrac_GALWEM], 'GALWEM':[totalCloudFrac_GALWEM], 'MPAS':['cldfrac_tot_UM_rand']}, + 'lowCloudFrac' : {'GFS':[lowCloudFrac_GFS], 'GALWEM17':[lowCloudFrac_GALWEM], 'GALWEM':[lowCloudFrac_GALWEM], 'MPAS':['cldfrac_low_UM']}, + 'midCloudFrac' : {'GFS':[midCloudFrac_GFS], 'GALWEM17':[midCloudFrac_GALWEM], 'GALWEM':[midCloudFrac_GALWEM], 'MPAS':['cldfrac_mid_UM']}, + 'highCloudFrac' : {'GFS':[highCloudFrac_GFS], 'GALWEM17':[highCloudFrac_GALWEM], 'GALWEM':[highCloudFrac_GALWEM], 'MPAS':['cldfrac_high_UM']}, + 'cloudTopHeight' : {'GFS':[''] , 'GALWEM17':[cloudTopHeight_GALWEM], 'GALWEM':[cloudTopHeight_GALWEM], 'MPAS':['cldht_top_UM']}, + 'cloudBaseHeight' : {'GFS':[''] , 'GALWEM17':[cloudBaseHeight_GALWEM], 'GALWEM':[cloudBaseHeight_GALWEM], 'MPAS':['cldht_base_UM']}, +} + +cloudFracCatThresholds = '>0, <10.0, >=10.0, >=20.0, >=30.0, >=40.0, >=50.0, >=60.0, >=70.0, >=80.0, >=90.0' # MET format string +brightnessTempThresholds = '<280.0, <275.0, <273.15, <270.0, <265.0, <260.0, <255.0, <250.0, <245.0, <240.0, <235.0, <230.0, <225.0, <220.0, <215.0, <210.0, <=SFP1, <=SFP5, <=SFP10, <=SFP25, <=SFP50, >=SFP50, >=SFP75, >=SFP90, >=SFP95, >=SFP99' +verifVariables = { + 'binaryCloud' : { 'MERRA2':['CLDTOT'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['TCC'], 'WWMCA':[totalCloudFrac_WWMCA], 'SAT_WWMCA_MEAN':['Mean_WWMCA_SATCORPS'], 'units':'NA', 'thresholds':'>0.0', 'interpMethod':'nearest' }, + 'totalCloudFrac' : { 'MERRA2':['CLDTOT'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['tcc'], 'WWMCA':[totalCloudFrac_WWMCA], 'SAT_WWMCA_MEAN':['Mean_WWMCA_SATCORPS'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'lowCloudFrac' : { 'MERRA2':['CLDLOW'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['lcc'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'midCloudFrac' : { 'MERRA2':['CLDMID'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['MCC'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'highCloudFrac' : { 'MERRA2':['CLDHGH'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['HCC'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'cloudTopTemp' : { 'MERRA2':['CLDTMP'], 'SATCORPS':['cloud_temperature_top_level'], 'ERA5':[''] , 'units':'K', 'thresholds':'NA', 'interpMethod':'bilin'}, + 'cloudTopPres' : { 'MERRA2':['CLDPRS'], 'SATCORPS':['cloud_pressure_top_level'], 'ERA5':[''] , 'units':'hPa', 'thresholds':'NA', 'interpMethod':'bilin'}, + 'cloudTopHeight' : { 'MERRA2':[''] , 'SATCORPS':['cloud_height_top_level'], 'ERA5':[''] , 'WWMCA':cloudTopHeight_WWMCA, 'units':'m', 'thresholds':'NA', 'interpMethod':'nearest'}, + 'cloudBaseHeight': { 'MERRA2':[''] , 'SATCORPS':['cloud_height_base_level'], 'ERA5':['cbh'], 'WWMCA':cloudBaseHeight_WWMCA, 'units':'m', 'thresholds':'NA', 'interpMethod':'nearest'}, + 'cloudCeiling' : { 'MERRA2':[''] , 'SATCORPS':[''], 'ERA5':[''] , 'units':'m', 'thresholds':'NA', 'interpMethod':'bilin'}, + 'brightnessTemp' : { 'MERRA2':[''] , 'SATCORPS':[''], 'ERA5':[''] , 'units':'K', 'thresholds':brightnessTempThresholds, 'interpMethod':'bilin'}, +} + +# Combine the two dictionaries +# Only reason verifVariablesModel exists is just for space--verifVaribles gets too long if we keep adding more datasets +for key in verifVariablesModel.keys(): + x = verifVariablesModel[key] + for key1 in x.keys(): + verifVariables[key][key1] = x[key1] + +#f = '/glade/u/home/schwartz/cloud_verification/GFS_grib_0.25deg/2018112412/gfs.0p25.2018112412.f006.grib2' +#grbs = pygrib.open(f) +#idx = pygrib.index(f,'parameterCategory','parameterNumber','typeOfFirstFixedSurface') +#model = 'GFS' +#variable = 'totCloudCover' +#x = verifVariablesModel[variable][model] # returns a list, whose ith element is a dictionary +# e.g., idx(parameterCategory=6,parameterNumber=1,typeOfFirstFixedSurface=234) +#idx(parameterCategory=x[0]['parameterCategory'],parameterNumber=x[0]['parameterNumber'],typeOfFirstFixedSurface=x[0]['typeOfFirstFixedSurface']) + +# to read in an environmental variable +#x = os.getenv('a') # probably type string no matter what + +########### + +def getThreshold(variable): + x = verifVariables[variable]['thresholds'] + print(x) # needed for python 3 to read variable into csh variable + return x + +def getInterpMethod(variable): + x = verifVariables[variable]['interpMethod'].upper() + print(x) # needed for python 3 to read variable into csh variable + return x + +def getTotalCloudFrac(source,data): + if source == 'SATCORPS': + # x = data[0][0,:,:,0] * 1.0E-2 # scaling + x = (data[0][0,:,:,1] + data[0][0,:,:,2] + data[0][0,:,:,3])*1.0E-2 # scaling + # y = data[0] + # x = np.sum( y[:,:,:,1:4],axis=3) + elif source == 'MERRA2': +# x = ( data[0][0,:,:]+data[1][0,:,:]+data[2][0,:,:] ) *100.0 # the ith element of data is a numpy array + x = data[0][0,:,:] * 100.0 # the ith element of data is a numpy array + print(x.min(), x.max()) + elif source == 'ERA5': + try: x = data[0][0,0,:,:] * 100.0 + except: x = data[0][0,:,:] * 100.0 + elif source == 'MPAS': + x = data[0][0,:,:] * 100.0 + elif source == 'SAT_WWMCA_MEAN': + x = data[0][0,:,:] # already in % + else: + x = data[0] + + # This next line is WRONG. + # Missing should be set to missing + # Then, the non-missing values are 1s and 0s + #output = np.where(x > 0.0, x, 0.0) + #output = np.where(x < 0.0, -9999.0, x) # missing. currently used for SATCORPS + + x = np.where( x < 0.0 , 0.0, x) # Force negative values to zero + x = np.where( x > 100.0, 100.0, x) # Force values > 100% to 100% + return x + +def getBinaryCloud(source,data): + y = getTotalCloudFrac(source,data) + # keep NaNs as is, but then set everything else to either 100% or 0% + x = np.where( np.isnan(y), y, np.where(y > 0.0, 100.0, 0.0) ) + return x + +def getLayerCloudFrac(source,data,layer): + if source == 'SATCORPS': + if layer.lower().strip() == 'low' : i = 1 + if layer.lower().strip() == 'mid' : i = 2 + if layer.lower().strip() == 'high' : i = 3 + x = data[0][0,:,:,i] * 1.0E-2 # scaling + elif source == 'MERRA2': + x = data[0][0,:,:] * 100.0 + elif source == 'ERA5': + try: x = data[0][0,0,:,:] * 100.0 + except: x = data[0][0,:,:] * 100.0 + elif source == 'MPAS': + x = data[0][0,:,:] * 100.0 + else: + x = data[0] + + x = np.where( x < 0.0, 0.0, x) # Force negative values to zero + x = np.where( x > 100.0, 100.0, x) # Force values > 100% to 100% + + return x + +def getCloudTopTemp(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E-2 # scaling + elif source == 'MERRA2': + x = data[0][0,:,:] + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + else: + x = data[0] + return x + +def getCloudTopPres(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E-1 # scaling + elif source == 'MERRA2': + x = data[0][0,:,:] * 1.0E-2 # scaling [Pa] -> [hPa] + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + else: + x = data[0] + return x + +def getCloudTopHeight(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E+1 # scaling to [meters] + elif source == 'MERRA2': + x = data[0][0,:,:] #TBD + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + elif source == 'GALWEM17': + x = data[0] * 1000.0 * 0.3048 # kilofeet -> meters + elif source == 'MPAS': + x = data[0][0,:,:] # already in meters + elif source == 'WWMCA': + # data is a list (should be length 4) + if len(data) != 4: + print('error with WWMCA Cloud top height') + sys.exit() + tmp = np.array(data) # already in meters + tmp = np.where( tmp <= 0, np.nan, tmp) # replace 0 or negative values with NAN + x = np.nanmax(tmp,axis=0) # get maximum cloud top height across all layers + else: + x = data[0] + + # Eliminate unphysical values (assume cloud top shouldn't be > 50000 meters) + y = np.where( x > 50000.0 , np.nan, x ) + + return y + +def getCloudBaseHeight(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E+1 # scaling to [meters] + elif source == 'MERRA2': + x = data[0][0,:,:] #TBD + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + elif source == 'GALWEM17': + x = data[0] * 1000.0 * 0.3048 # kilofeet -> meters + elif source == 'MPAS': + x = data[0][0,:,:] # already in meters + elif source == 'WWMCA': + # data is a list (should be length 4) + if len(data) != 4: + print('error with WWMCA Cloud base height') + sys.exit() + tmp = np.array(data) # already in meters + tmp = np.where( tmp <= 0, np.nan, tmp) # replace 0 or negative values with NAN + x = np.nanmin(tmp,axis=0) # get lowest cloud base over all layers + else: + x = data[0] + + # Eliminate unphysical values (assume cloud base shouldn't be > 50000 meters) + y = np.where( x > 50000.0 , np.nan, x ) + + return y + +def getCloudCeiling(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] #TBD + elif source == 'MERRA2': + x = data[0][0,:,:] #TBD + elif source == 'ERA5': + try: x = data[0][0,0,:,:] # TBD + except: x = data[0][0,:,:] + return x + +# add other functions for different variables + +########### + +def getDataArray(inputFile,source,variable,dataSource): + # 1) inputFile: File name--either observations or forecast + # 2) source: Obsevation source (e.g., MERRA, SATCORP, etc.) + # 3) variable: Variable to verify + # 4) dataSource: If 1, process forecast file. If 2 process obs file. + +# # specifying names here temporarily. file names should be passed in to python from shell script +# if source == 'merra': nc_file = '/gpfs/fs1/scratch/schwartz/MERRA/MERRA2_400.tavg1_2d_rad_Nx.20181101.nc4' +# elif source == 'satcorp': nc_file = '/glade/scratch/bjung/met/test_satcorps/GEO-MRGD.2018334.0000.GRID.NC' +# elif source == 'era5': nc_file = '/glade/scratch/bjung/met/test_era5/e5.oper.fc.sfc.instan.128_164_tcc.regn320sc.2018111606_2018120112.nc' + + source = source.upper().strip() # Force uppercase and get rid of blank spaces, for safety + + print('dataSource = ',dataSource) + + ftype = griddedDatasets[source]['ftype'].lower().strip() + + # Get file handle + if ftype == 'nc': + nc_fid = Dataset(inputFile, "r", format="NETCDF4") + #nc_fid.set_auto_scale(True) + elif ftype == 'grib': + if source == 'WWMCA': + idx = pygrib.index(inputFile,'parameterName','typeOfLevel','level') + else: + idx = pygrib.index(inputFile,'parameterCategory','parameterNumber','typeOfFirstFixedSurface') + + # dataSource == 1 means forecast, 2 means obs +# if dataSource == 1: varsToRead = verifVariablesModel[variable][source] # if ftype == 'grib', returns a list whose ith element is a dictionary. otherwise, just a list +# if dataSource == 2: varsToRead = verifVariables[variable][source] # returns a list + varsToRead = verifVariables[variable][source] # if ftype == 'grib', returns a list whose ith element is a dictionary. otherwise, just a list + + print('Trying to read ',inputFile) + + # Get lat/lon information--currently not used + #latVar = griddedDatasets[source]['latVar'] + #lonVar = griddedDatasets[source]['lonVar'] + #lats = np.array(nc_fid.variables[latVar][:]) # extract/copy the data + #lons = np.array(nc_fid.variables[lonVar][:] ) + + #print(lats.max()) + #print(lons.max()) + + # one way to deal with scale factors + # probably using something like nc_fid.set_auto_scale(True) is better... + #latMax = lats.max() + #while latMax > 90.0: + # lons = lons * 0.1 + # lats = lats * 0.1 + # latMax = lats.max() + + # get data + data = [] + for v in varsToRead: + if ftype == 'grib': + if source == 'WWMCA': + x = idx(parameterName=v['parameterName'],typeOfLevel=v['typeOfLevel'],level=v['level'])[0] # by getting element 0, you get a pygrib message + else: + # e.g., idx(parameterCategory=6,parameterNumber=1,typeOfFirstFixedSurface=234) + if ( variable == 'cloudTopHeight' or variable == 'cloudBaseHeight') and source == 'GALWEM17': + x = idx(parameterCategory=v['parameterCategory'],parameterNumber=v['parameterNumber'],typeOfFirstFixedSurface=v['typeOfFirstFixedSurface'])[1] # by getting element 1, you get a pygrib message + else: + x = idx(parameterCategory=v['parameterCategory'],parameterNumber=v['parameterNumber'],typeOfFirstFixedSurface=v['typeOfFirstFixedSurface'])[0] # by getting element 0, you get a pygrib message + if x.shortName != v['shortName']: print('Name mismatch!') + #ADDED BY JOHN O + print(x) + print('Reading ', x.shortName, 'at level ', x.typeOfFirstFixedSurface) + read_var = x.values # same x.data()[0] + read_missing = x.missingValue + print('missing value = ',read_missing) + + # The missing value (read_missing) for GALWEM17 and GALWEM cloud base/height is 9999, which is not the best choice because + # those could be actual values. So we need to use the masked array part (below) to handle which + # values are missing. We also set read_missing to something unphysical to essentially disable it. + # Finally, if we don't change the 'missingValue' property in the GRIB2 file we are eventually outputting, + # the bitmap will get all messed up, because it will be based on 9999 instead of $missing_values + if variable == 'cloudTopHeight' or variable == 'cloudBaseHeight': + read_missing = -9999. + x['missingValue'] = read_missing + if source == 'GALWEM17': + #These are masked numpy arrays, with mask = True where there is a missing value (no cloud) + #Use np.ma.filled to create an ndarray where mask = True values are set to np.nan + read_var = np.ma.filled(read_var.astype(read_var.dtype), np.nan) + elif ftype == 'nc': + read_var = nc_fid.variables[v] # extract/copy the data + try: + read_missing = read_var.missing_value # get variable attributes. Each dataset has own missing values. + except: + read_missing = -9999. # set a default missing value. probably only need to do this for MPAS + + print('Reading ', v) + + this_var = np.array( read_var ) # to numpy array + #print(read_missing, np.nan) + this_var = np.where( this_var==read_missing, np.nan, this_var ) + #print(this_var.shape) + data.append(this_var) # ith element of the list is a NUMPY ARRAY for the ith variable + #print(type(this_var)) + #print(type(data)) + + # Call a function to get the variable of interest. + # Add a new function for each variable + if variable == 'binaryCloud': raw_data = getBinaryCloud(source,data) + if variable == 'totalCloudFrac': raw_data = getTotalCloudFrac(source,data) + if variable == 'lowCloudFrac': raw_data = getLayerCloudFrac(source,data,'low') + if variable == 'midCloudFrac': raw_data = getLayerCloudFrac(source,data,'mid') + if variable == 'highCloudFrac': raw_data = getLayerCloudFrac(source,data,'high') + if variable == 'cloudTopTemp': raw_data = getCloudTopTemp(source,data) + if variable == 'cloudTopPres': raw_data = getCloudTopPres(source,data) + if variable == 'cloudTopHeight': raw_data = getCloudTopHeight(source,data) + if variable == 'cloudBaseHeight': raw_data = getCloudBaseHeight(source,data) + if variable == 'cloudCeiling': raw_data = getCloudCeiling(source,data) + + raw_data = np.where(np.isnan(raw_data), missing_values, raw_data) # replace np.nan to missing_values (for MET) + + # Array met_data is passed to MET + # Graphics should plot $met_data to make sure things look correct + if griddedDatasets[source]['flipY']: + print('flipping ',source,' data about y-axis') + met_data=np.flip(raw_data,axis=0).astype(float) + else: + met_data=raw_data.astype(float) + + # Make plotting optional or Just use plot_data_plane +# plt_data=np.where(met_data<0, np.nan, met_data) +# map=Basemap(projection='cyl',llcrnrlat=-90,urcrnrlat=90,llcrnrlon=-180,urcrnrlon=180,resolution='c') +# map.drawcoastlines() +# map.drawcountries() +# map.drawparallels(np.arange(-90,90,30),labels=[1,1,0,1]) +# map.drawmeridians(np.arange(0,360,60),labels=[1,1,0,1]) +# plt.contourf(lons,lats,plt_data,20,origin='upper',cmap=cm.Greens) #cm.gist_rainbow) +# title=source+"_"+variable+"_"+str(validTime) +# plt.title(title) +# plt.colorbar(orientation='horizontal') +# plt.savefig(title+".png") + + # If a forecast file, output a GRIB file with + # 1 record containing the met_data + # This is a hack, because right now, MET python embedding doesn't work with pygrib, + # so output the data to a temporary file, and then have MET read the temporary grib file. + # Starting with version 9.0 of MET, the hack isn't needed, and MET python embedding works with pygrib + outputFcstFile = False # MUST be True for MET version < 9.0. For MET 9.0+, optional + if dataSource == 1 and ftype == 'grib': + if outputFcstFile: + grbtmp = x + grbtmp['values']=met_data + grbout = open('temp_fcst.grb2','ab') + grbout.write(grbtmp.tostring()) + grbout.close() # Close the outfile GRIB file + print('Successfully output temp_fcst.grb2') + + # Close files + if ftype == 'grib': idx.close() # Close the input GRIB file + if ftype == 'nc': nc_fid.close() # Close the netCDF file + + return met_data + +def obsError(fcstData,obsErrorFile,validDate,dataSource): + + print('Adding noise to the cloud fraction fields') + print('Using obsErrorFile',obsErrorFile) + + # First load the obsError information + #obsErrorFile = 'ob_errors.pk' + infile = open(obsErrorFile,'rb') + binEdges, binStddev = pk.load(infile) # 'numpy.ndarray' types + infile.close() + + # Get 1d forecast data + shape = fcstData.shape + fcst = fcstData.flatten() + + # Set random number seed based on valid time and model + if dataSource.upper().strip() == 'MPAS': ii = 10 + elif dataSource.upper().strip() == 'GALWEM': ii = 20 + elif dataSource.upper().strip() == 'GFS': ii = 30 + np.random.seed(int(validDate*.1 + ii)) + + # Find which bin the data is in + for i in range(0,len(binEdges)-1): + idx = np.where( (fcst >= binEdges[i]) & (fcst < binEdges[i+1]) )[0] + n = len(idx) # number of points in the ith bin + if n > 0: # check for empty bins + randVals = np.random.normal(0,binStddev[i],n) + fcst[idx] = fcst[idx] + randVals + + # bound forecast values to between 0 and 100% + fcst = np.where( fcst < 0.0, 0.0, fcst) + fcst = np.where( fcst > 100.0, 100.0, fcst) + + # now reshape forecast data back to 2D + output = fcst.reshape(shape) + + # data will have NaNs where bad. + return output + +def getFcstCloudFrac(cfr,pmid,psfc,layerDefinitions): # cfr is cloud fraction(%), pmid is 3D pressure(Pa), psfc is surface pressure (Pa) code from UPP ./INITPOST.F + + if pmid.shape != cfr.shape: # sanity check + print('dimension mismatch bewteen cldfra and pressure') + sys.exit() + + nlocs, nlevs = pmid.shape + + if len(psfc) != nlocs: # another sanity check + print('dimension mismatch bewteen cldfra and surface pressure') + sys.exit() + + cfracl = np.zeros(nlocs) + cfracm = np.zeros(nlocs) + cfrach = np.zeros(nlocs) + + for i in range(0,nlocs): + + PTOP_HIGH = PTOP_HIGH_UPP + if layerDefinitions.upper().strip() == 'ERA5': + PTOP_LOW = 0.8*psfc[i] + PTOP_MID = 0.45*psfc[i] + elif layerDefinitions.upper().strip() == 'UPP': + PTOP_LOW = PTOP_LOW_UPP + PTOP_MID = PTOP_MID_UPP + + idxLow = np.where( pmid[i,:] >= PTOP_LOW)[0] # using np.where with just 1 argument returns tuple + idxMid = np.where( (pmid[i,:] < PTOP_LOW) & (pmid[i,:] >= PTOP_MID))[0] + idxHigh = np.where( (pmid[i,:] < PTOP_MID) & (pmid[i,:] >= PTOP_HIGH))[0] + + # use conditions in case all indices are missing + if (len(idxLow) >0 ): cfracl[i] = np.max( cfr[i,idxLow] ) + if (len(idxMid) >0 ): cfracm[i] = np.max( cfr[i,idxMid] ) + if (len(idxHigh) >0 ): cfrach[i] = np.max( cfr[i,idxHigh] ) + + tmp = np.vstack( (cfracl,cfracm,cfrach)) # stack the rows into one 2d array + cldfraMax = np.max(tmp,axis=0) # get maximum value across low/mid/high for each pixel (minimum overlap assumption) + + # This is the fortran code put into python format...double loop unnecessary and slow + #for i in range(0,nlocs): + # for k in range(0,nlevs): + # if pmid(i,k) >= PTOP_LOW: + # cfracl(i) = np.max( [cfracl(i),cfr(i,k)] ) # Low + # elif pmid(i,k) < PTOP_LOW and pmid(i,k) >= PTOP_MID: + # cfracm(i) = np.max( [cfracm(i),cfr(i,k)] ) # Mid + # elif pmid(i,k) < PTOP_MID and pmid(i,k) >= PTOP_HIGH: # High + # cfrach(i) = np.max( [cfrach(i),cfr(i,k)] ) + + return cfracl, cfracm, cfrach, cldfraMax + +def getGOES16LatLon(g16_data_file): + + # Start timer + startTime = dt.datetime.utcnow() + + # designate dataset + g16nc = Dataset(g16_data_file, 'r') + + # GOES-R projection info and retrieving relevant constants + proj_info = g16nc.variables['goes_imager_projection'] + lon_origin = proj_info.longitude_of_projection_origin + H = proj_info.perspective_point_height+proj_info.semi_major_axis + r_eq = proj_info.semi_major_axis + r_pol = proj_info.semi_minor_axis + + # Data info + lat_rad_1d = g16nc.variables['x'][:] + lon_rad_1d = g16nc.variables['y'][:] + + # close file when finished + g16nc.close() + g16nc = None + + # create meshgrid filled with radian angles + lat_rad,lon_rad = np.meshgrid(lat_rad_1d,lon_rad_1d) + + # lat/lon calc routine from satellite radian angle vectors + + lambda_0 = (lon_origin*np.pi)/180.0 + + a_var = np.power(np.sin(lat_rad),2.0) + (np.power(np.cos(lat_rad),2.0)*(np.power(np.cos(lon_rad),2.0)+(((r_eq*r_eq)/(r_pol*r_pol))*np.power(np.sin(lon_rad),2.0)))) + b_var = -2.0*H*np.cos(lat_rad)*np.cos(lon_rad) + c_var = (H**2.0)-(r_eq**2.0) + + r_s = (-1.0*b_var - np.sqrt((b_var**2)-(4.0*a_var*c_var)))/(2.0*a_var) + + s_x = r_s*np.cos(lat_rad)*np.cos(lon_rad) + s_y = - r_s*np.sin(lat_rad) + s_z = r_s*np.cos(lat_rad)*np.sin(lon_rad) + + lat = (180.0/np.pi)*(np.arctan(((r_eq*r_eq)/(r_pol*r_pol))*((s_z/np.sqrt(((H-s_x)*(H-s_x))+(s_y*s_y)))))) + lon = (lambda_0 - np.arctan(s_y/(H-s_x)))*(180.0/np.pi) + + # End timer + endTime = dt.datetime.utcnow() + time = (endTime - startTime).microseconds / (1000.0*1000.0) + print('took %f4.1 seconds to get GOES16 lat/lon'%(time)) + + return lon,lat # lat/lon are 2-d arrays + +# -- +def getGOESRetrivalData(goesFile,goesVar): + + if not os.path.exists(goesFile): + print(goesFile+' not there. exit') + sys.exit() + + # First get GOES lat/lon + goesLon2d, goesLat2d = getGOES16LatLon(goesFile) # 2-d arrays + goesLon = goesLon2d.flatten() # 1-d arrays + goesLat = goesLat2d.flatten() + + # Now open the file and get the data we want + nc_goes = Dataset(goesFile, "r", format="NETCDF4") + + # If the next line is true (it should be), this indicates the variable needs to be treated + # as an "unsigned 16-bit integer". This is a pain. So we must use the "astype" method + # to change the variable type BEFORE applying scale_factor and add_offset. After the conversion + # we then can manually apply the scale factor and offset + #goesVar = 'PRES' + goesVar = goesVar.strip() # for safety + if nc_goes.variables[goesVar]._Unsigned.lower().strip() == 'true': + nc_goes.set_auto_scale(False) # Don't automatically apply scale_factor and add_offset to variable + goesData2d = np.array( nc_goes.variables[goesVar]).astype(np.uint16) + goesData2d = goesData2d * nc_goes.variables[goesVar].scale_factor + nc_goes.variables[goesVar].add_offset + goesQC2d = np.array( nc_goes.variables['DQF']).astype(np.uint8) + else: + goesData2d = np.array( nc_goes.variables[goesVar]) + goesQC2d = np.array( nc_goes.variables['DQF']) + + # Make variables 1-d + goesQC = goesQC2d.flatten() + goesData = goesData2d.flatten() + nc_goes.close() + + # Get rid of NaNs; base it on longitude + goesData = goesData[~np.isnan(goesLon)] # Handle data arrays first before changing lat/lon itself + goesQC = goesQC[~np.isnan(goesLon)] + goesLon = goesLon[~np.isnan(goesLon)] # ~ is "logical not", also np.logical_not + goesLat = goesLat[~np.isnan(goesLat)] + if goesLon.shape != goesLat.shape: + print('GOES lat/lon shape mismatch') + sys.exit() + + # If goesQC == 0, good QC and there was a cloud with a valid pressure. + # If goesQC == 4, no cloud; probably clear sky. + # All other QC means no data, and we want to remove those points + idx = np.logical_or( goesQC == 0, goesQC == 4) # Only keep QC == 0 or 4 + goesData = goesData[idx] + goesQC = goesQC[idx] + goesLon = goesLon[idx] + goesLat = goesLat[idx] + + # Only QC with 0 or 4 are left; now set QC == 4 to missing to indicate clear sky + goesData = np.where( goesQC != 0, missing_values, goesData) + + # Get longitude to between (0,360) for consistency with JEDI files (this check is applied to JEDI files, too) + goesLon = np.where( goesLon < 0, goesLon + 360.0, goesLon ) + + print('Min GOES Lon = ',np.min(goesLon)) + print('Max GOES Lon = ',np.max(goesLon)) + + return goesLon, goesLat, goesData + +def point2point(source,inputDir,satellite,channel,goesFile,condition,layerDefinitions,dataSource): + + # Static Variables for QC and obs + qcVar = 'brightness_temperature_'+str(channel)+'@EffectiveQC' #'@EffectiveQC0' # QC variable + obsVar = 'brightness_temperature_'+str(channel)+'@ObsValue' # Observation variable + + # Get GOES-16 retrieval file with auxiliary information + if 'abi' in satellite or 'ahi' in satellite: + goesLon, goesLat, goesData = getGOESRetrivalData(goesFile,'PRES') # return 1-d arrays + lonlatGOES = np.array( list(zip(goesLon, goesLat))) # lon/lat pairs for each GOES ob (nobs_GOES, 2) + #print('shape lonlatGOES = ',lonlatGOES.shape) + print('getting data from ',goesFile) + myGOESInterpolator = NearestNDInterpolator(lonlatGOES,goesData) + + # First check to see if there's a concatenated file with all obs. + # If so, use that. If not, have to process one file per processor, which takes a lot more time + if os.path.exists(inputDir+'/obsout_omb_'+satellite+'_ALL.nc4'): + inputFiles = [inputDir+'/obsout_omb_'+satellite+'_ALL.nc4'] # needs to be in a list since we loop over inputFiles + else: + # Get list of OMB files to process. There is one file per processor. + # Need to get them in order so they are called in the same order for the + # forecast and observed passes through this subroutine. + files = os.listdir(inputDir) + inputFiles = fnmatch.filter(files,'obsout*_'+satellite+'*nc4') # returns relative path names + inputFiles = [inputDir+'/'+s for s in inputFiles] # add on directory name + inputFiles.sort() # Get in order from low to high + if len(inputFiles) == 0: return -99999, -99999 # if no matching files, force a failure + + # Variable to pull for brightness temperature +# if dataSource == 1: v = 'brightness_temperature_'+str(channel)+'@GsiHofXBc' # Forecast variable + if dataSource == 1: v = 'brightness_temperature_'+str(channel)+'@hofx' #'@depbg' # OMB + if dataSource == 2: v = obsVar + + # Read the files and put data in array + allData, allDataQC = [], [] + for inputFile in inputFiles: + nc_fid = Dataset(inputFile, "r", format="NETCDF4") #Dataset is the class behavior to open the file + print('Trying to read ',v,' from ',inputFile) + + # Read forecast/obs data + read_var = nc_fid.variables[v] # extract/copy the data + # read_missing = read_var.missing_value # get variable attributes. Each dataset has own missing values. + this_var = np.array( read_var ) # to numpy array + # this_var = np.where( this_var==read_missing, np.nan, this_var ) + + #if dataSource == 1: # If true, we just read in OMB data, but we want B + # obsData = np.array( nc_fid.variables[obsVar]) + # this_var = obsData - this_var # get background/forecast value (O - OMB = B) + + #Read QC data + qcData = np.array(nc_fid.variables[qcVar]) + + # Sanity check...shapes should match + if qcData.shape != this_var.shape: return -99999, -99999 + + if 'abi' in satellite or 'ahi' in satellite: + + # Get the GOES-16 retrieval data at the observation locations in this file + # GOES values < 0 mean clear sky + lats = np.array(nc_fid.variables['latitude@MetaData']) + lons = np.array(nc_fid.variables['longitude@MetaData']) + + # Get longitude to between (0,360) for consistency with GOES-16 files + lons = np.where( lons < 0, lons + 360.0, lons ) + + lonlat = np.array( list(zip(lons,lats))) # lon/lat pairs for each ob (nobs, 2) + thisGOESData = myGOESInterpolator(lonlat) # GOES data at obs locations in this file. If pressure, units are hPa + thisGOESData = thisGOESData * 100.0 # get into Pa + + #obsCldfra = np.array( nc_fid.variables['cloud_area_fraction@MetaData'] )*100.0 # Get into %...observed cloud fraction (AHI/ABI only) + + geoValsFile = inputFile.replace('obsout','geoval') + if not os.path.exists(geoValsFile): + print(geoValsFile+' not there. exit') + sys.exit() + + nc_fid2 = Dataset(geoValsFile, "r", format="NETCDF4") + fcstCldfra = np.array( nc_fid2.variables['cloud_area_fraction_in_atmosphere_layer'])*100.0 # Get into % + pressure = np.array( nc_fid2.variables['air_pressure']) # Pa + pressure_edges = np.array( nc_fid2.variables['air_pressure_levels']) # Pa + psfc = pressure_edges[:,-1] # Surface pressure (Pa)...array order is top down + if layerDefinitions.upper().strip() == 'ERA5': + PTOP_LOW = 0.8*psfc # these are arrays + PTOP_MID = 0.45*psfc + PTOP_HIGH = PTOP_HIGH_UPP * np.ones_like(psfc) + elif layerDefinitions.upper().strip() == 'UPP': + PTOP_LOW = PTOP_LOW_UPP # these are constants + PTOP_MID = PTOP_MID_UPP + PTOP_HIGH = PTOP_HIGH_UPP + else: + print('layerDefinitions = ',layerDefinitions,'is invalid. exit') + sys.exit() + fcstLow,fcstMid,fcstHigh,fcstTotCldFra = getFcstCloudFrac(fcstCldfra,pressure,psfc,layerDefinitions) # get low/mid/high/total forecast cloud fractions for each ob + nc_fid2.close() + + # Modify QC data based on correspondence between forecast and obs. qcData used to select good data later + # It's possible that there are multiple forecast layers, such that fcstLow,fcstMid,fcstHigh are all > $cldfraThresh + # However, GOES-16 CTP doesn't really account for layering. So, we need to remove layered clouds from the forecast, + # focusing only on the layers that we asked for when doing {low,mid,high}Only conditions + # The "|" is symbol for "np.logcal_or" + yes = 2.0 + no = 0.0 + cldfraThresh = 20.0 # percent + if qcData.shape == fcstTotCldFra.shape == thisGOESData.shape: # these should all match + print('Using condition ',condition,'for ABI/AHI') + + # Note that "&" is "np.logical_and" for boolean (true/false) quantities. + # Thus, each condition should be enclosed in parentheses + if condition.lower().strip() == 'clearOnly'.lower(): # clear in both forecast and obs + qcData = np.where( (fcstTotCldFra < cldfraThresh) & (thisGOESData <= 0.0), qcData, missing_values) + elif condition.lower().strip() == 'cloudyOnly'.lower(): # cloudy in both forecast and obs + qcData = np.where( (fcstTotCldFra >= cldfraThresh) & (thisGOESData > 0.0), qcData, missing_values) + elif condition.lower().strip() == 'lowOnly'.lower(): # low clouds in both forecast and obs + fcstLow = np.where( (fcstMid >= cldfraThresh) | ( fcstHigh >= cldfraThresh), missing_values, fcstLow) # remove mid, high + qcData = np.where( (fcstLow >= cldfraThresh) & ( thisGOESData >= PTOP_LOW), qcData, missing_values) + elif condition.lower().strip() == 'midOnly'.lower(): # mid clouds in both forecast and obs + fcstMid = np.where( (fcstLow >= cldfraThresh) | ( fcstHigh >= cldfraThresh), missing_values, fcstMid) # remove low, high + qcData = np.where( (fcstMid >= cldfraThresh) & (thisGOESData < PTOP_LOW) & (thisGOESData >= PTOP_MID), qcData, missing_values) + elif condition.lower().strip() == 'highOnly'.lower(): # high clouds in both forecast and obs + fcstHigh = np.where( (fcstLow >= cldfraThresh) | ( fcstMid >= cldfraThresh), missing_values, fcstHigh) # remove mid, high + qcData = np.where( (fcstHigh >= cldfraThresh) & (thisGOESData < PTOP_MID) & (thisGOESData >= PTOP_HIGH), qcData, missing_values) + elif condition.lower().strip() == 'fcstLow'.lower(): # low clouds in forecast (layers possible); obs could be anything + qcData = np.where( fcstLow >= cldfraThresh , qcData, missing_values) + elif condition.lower().strip() == 'fcstMid'.lower(): # low clouds in forecast (layers possible); obs could be anything + qcData = np.where( fcstMid >= cldfraThresh , qcData, missing_values) + elif condition.lower().strip() == 'fcstHigh'.lower(): # low clouds in forecast (layers possible); obs could be anything + qcData = np.where( fcstHigh >= cldfraThresh , qcData, missing_values) + elif condition.lower().strip() == 'cloudEventLow'.lower(): + if dataSource == 1: this_var = np.where( fcstLow >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( thisGOESData >= PTOP_LOW, yes, no ) + elif condition.lower().strip() == 'cloudEventMid'.lower(): + if dataSource == 1: this_var = np.where( fcstMid >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( (thisGOESData < PTOP_LOW) & (thisGOESData >= PTOP_MID), yes, no ) + elif condition.lower().strip() == 'cloudEventHigh'.lower(): + if dataSource == 1: this_var = np.where( fcstHigh >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( (thisGOESData < PTOP_MID) & (thisGOESData >= PTOP_HIGH), yes, no ) + elif condition.lower().strip() == 'cloudEventTot'.lower(): + if dataSource == 1: this_var = np.where( fcstTotCldFra >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( thisGOESData > 0.0, yes, no ) + elif condition.lower().strip() == 'all': + print("not doing any conditional verification or stratifying by event") + else: + print("condition = ",condition," not recognized.") + sys.exit() + #elif condition.lower().strip() == '4x4table'.lower(): + #if dataSource == 1: + # this_var = np.where( fcstLow >= cldfraThresh, yesLow, no ) + # this_var = this_var + np.where( fcstMid >= cldfraThresh, yesMid, no ) + # this_var = this_var + np.where( fcstHigh >= cldfraThresh, yesHigh, no ) + print('number removed = ', (qcData==missing_values).sum()) + #print('number passed = ', qcData.shape[0] - (qcData==missing_values).sum()) + else: + print('shape mismatch') + return -99999, -99999 + + # Append to arrays + allData.append(this_var) + allDataQC.append(qcData) + + nc_fid.close() # done with the file, so close it before going to next file in loop + + # We're now all done looping over the individul files + + # Get the indices with acceptable QC + allQC = np.concatenate(allDataQC) # Put list of numpy arrays into a single long 1-D numpy array. All QC data. + idx = np.where(allQC==0) # returns indices + + # Now get all the forecast/observed brightness temperature data with acceptable QC + this_var = np.concatenate(allData)[idx] # Put list of numpy arrays into a single long 1-D numpy array. This is all the forecast/obs data with good QC + numObs = this_var.shape[0] # number of points with good QC for this channel + print('Number of obs :',numObs) + + # Assume all the points actually fit into a square grid. Get the side of the square (use ceil to round up) + if numObs > 0: + l = np.ceil(np.sqrt(numObs)).astype('int') # Length of the side of the square + + # Make an array that can be reshaped into the square + raw_data1D = np.full(l*l,np.nan) # Initialize 1D array of length l**2 to np.nan + raw_data1D[0:numObs] = this_var[:] # Fill data to the extent possible. There will be some np.nan values at the end + raw_data = np.reshape(raw_data1D,(l,l)) # Reshape into "square grid" + + raw_data = np.where(np.isnan(raw_data), missing_values, raw_data) # replace np.nan to missing_values (for MET) + + met_data=raw_data.astype(float) # Give MET this info + + # Now need to tell MET the "grid" for the data + # Make a fake lat/lon grid going from 0.0 to 50.0 degrees, with the interval determined by number of points + griddedDatasets[source]['latDef'][0] = 0.0 # starting point + griddedDatasets[source]['latDef'][1] = np.diff(np.linspace(0,50,l)).round(6)[0] # interval (degrees) + griddedDatasets[source]['latDef'][2] = int(l) # number of points + griddedDatasets[source]['lonDef'][0:3] = griddedDatasets[source]['latDef'] + + gridInfo = getGridInfo(source, griddedDatasets[source]['gridType']) # 'LatLon' gridType + return met_data, gridInfo + + else: + return -99999, -99999 + +########### +def getGridInfo(source,gridType): + + if gridType == 'LatLon': + latDef = griddedDatasets[source]['latDef'] + lonDef = griddedDatasets[source]['lonDef'] + gridInfo = { + 'type': gridType, + 'name': source, + 'lat_ll': latDef[0], #-90.000, + 'lon_ll': lonDef[0], #-180.000, + 'delta_lat': latDef[1], #0.5000, + 'delta_lon': lonDef[1], #0.625, + 'Nlat': latDef[2], #361, + 'Nlon': lonDef[2], #576, + } + elif gridType == 'Gaussian': + gridInfo = { + 'type': gridType, + 'name': source, + 'nx': griddedDatasets[source]['nx'], + 'ny': griddedDatasets[source]['ny'], + 'lon_zero': griddedDatasets[source]['lon_zero'], + } + + return gridInfo + +def getAttrArray(source,variable,initTime,validTime): + + init = dt.datetime.strptime(initTime,"%Y%m%d%H") + valid = dt.datetime.strptime(validTime,"%Y%m%d%H") + lead, rem = divmod((valid-init).total_seconds(), 3600) + + attrs = { + + 'valid': valid.strftime("%Y%m%d_%H%M%S"), + 'init': init.strftime("%Y%m%d_%H%M%S"), + 'lead': str(int(lead)), + 'accum': '000000', + + 'name': variable, #'MERRA2_Cloud_Percentage' + 'long_name': variable, #'Cloud Percentage Levels', + 'level': 'ALL', + 'units': verifVariables[variable]['units'], + + 'grid': getGridInfo(source,griddedDatasets[source]['gridType']) + } + + #print(attrs) + #print(griddedDatasets[source]) + + return attrs + +######## END FUNCTIONS ########## + + +#if __name__ == "__main__": +dataFile, dataSource, variable, i_date, v_date, flag = sys.argv[1].split(":") +met_data = getDataArray(dataFile,dataSource,variable,flag) +attrs = getAttrArray(dataSource,variable,i_date,v_date) +print(attrs) diff --git a/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.conf b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.conf new file mode 100644 index 0000000000..7ff401c5ff --- /dev/null +++ b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.conf @@ -0,0 +1,171 @@ +[config] + +# Documentation for this use case can be found at +# https://metplus.readthedocs.io/en/latest/generated/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.html + +# For additional information, please see the METplus Users Guide. +# https://metplus.readthedocs.io/en/latest/Users_Guide + +# ### +# Processes to run +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#process-list +### + + +PROCESS_LIST = GridStat, GridStat(nbr) + +### +# Time Info +# LOOP_BY options are INIT, VALID, RETRO, and REALTIME +# If set to INIT or RETRO: +# INIT_TIME_FMT, INIT_BEG, INIT_END, and INIT_INCREMENT must also be set +# If set to VALID or REALTIME: +# VALID_TIME_FMT, VALID_BEG, VALID_END, and VALID_INCREMENT must also be set +# LEAD_SEQ is the list of forecast leads to process +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#timing-control +### + + +LOOP_BY = INIT +INIT_TIME_FMT = %Y%m%d%H +INIT_BEG=2022070312 +INIT_END=2022070312 +INIT_INCREMENT = 12H + +LEAD_SEQ = 36 + +LOOP_ORDER = times + +### +# File I/O +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#directory-and-filename-template-info +### + +FCST_GRID_STAT_INPUT_DIR = {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp +FCST_GRID_STAT_INPUT_TEMPLATE = {init?fmt=%Y%m%d}_gfs.t12z.pgrb2.0p25.f0{LEAD_SEQ} + +OBS_GRID_STAT_INPUT_DIR = +OBS_GRID_STAT_INPUT_TEMPLATE = PYTHON_NUMPY + +GRID_STAT_CLIMO_MEAN_INPUT_DIR = +GRID_STAT_CLIMO_MEAN_INPUT_TEMPLATE = + +GRID_STAT_CLIMO_STDEV_INPUT_DIR = +GRID_STAT_CLIMO_STDEV_INPUT_TEMPLATE = + +GRID_STAT_OUTPUT_DIR = {OUTPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp +GRID_STAT_OUTPUT_TEMPLATE = + + +### +# Field Info +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#field-info +### + +MODEL = GFS +OBTYPE = SATCORPS + +CONFIG_DIR = {PARM_BASE}/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp + +FCST_VAR1_NAME = TMP +FCST_VAR1_LEVELS = R649 +FCST_VAR1_THRESH = gt0, lt180.0, ge190.0, ge200.0, ge210.0, ge220.0, ge230.0, ge240.0, ge250.0, ge260.0, ge270.0, ge280.0 +FCST_VAR2_NAME = PRES +FCST_VAR2_LEVELS = R646 +FCST_VAR2_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 +FCST_VAR2_OPTIONS = convert(x)=x*0.01; +OBS_VAR1_NAME = {CONFIG_DIR}/read_input_data.py {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp/GEO-MRGD.{valid?fmt=%Y%j.%H}30.GRID.NC:{OBTYPE}:cloudTopTemp:{init?fmt=%Y%m%d%H}:{valid?fmt=%Y%m%d%H}:2 +OBS_VAR1_LEVELS = +OBS_VAR1_THRESH = gt0, lt180.0, ge190.0, ge200.0, ge210.0, ge220.0, ge230.0, ge240.0, ge250.0, ge260.0, ge270.0, ge280.0 +OBS_VAR2_NAME = {CONFIG_DIR}/read_input_data.py {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp/GEO-MRGD.{valid?fmt=%Y%j.%H}30.GRID.NC:{OBTYPE}:cloudTopPres:{init?fmt=%Y%m%d%H}:{valid?fmt=%Y%m%d%H}:2 +OBS_VAR2_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 + + +### +# GridStat Settings +# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#gridstat +### + +#LOG_GRID_STAT_VERBOSITY = 2 + +GRID_STAT_CONFIG_FILE = {PARM_BASE}/met_config/GridStatConfig_wrapped + +GRID_STAT_REGRID_TO_GRID = FCST +GRID_STAT_REGRID_METHOD = BILIN +GRID_STAT_REGRID_WIDTH = 2 + + +GRID_STAT_DESC = + +FCST_GRID_STAT_FILE_WINDOW_BEGIN = 0 +FCST_GRID_STAT_FILE_WINDOW_END = 0 +OBS_GRID_STAT_FILE_WINDOW_BEGIN = 0 +OBS_GRID_STAT_FILE_WINDOW_END = 0 + +GRID_STAT_NEIGHBORHOOD_WIDTH = 1 +GRID_STAT_NEIGHBORHOOD_SHAPE = SQUARE + +GRID_STAT_NEIGHBORHOOD_COV_THRESH = >=0.5 + +GRID_STAT_ONCE_PER_FIELD = False + +FCST_IS_PROB = false + +FCST_GRID_STAT_PROB_THRESH = ==0.1 + +OBS_IS_PROB = false + +OBS_GRID_STAT_PROB_THRESH = ==0.1 + +GRID_STAT_OUTPUT_PREFIX = {MODEL}_to_{OBTYPE}_F{lead?fmt=%H}_CloudHgts + +GRID_STAT_OUTPUT_FLAG_FHO = STAT +GRID_STAT_OUTPUT_FLAG_CTC = BOTH +GRID_STAT_OUTPUT_FLAG_CTS = BOTH +GRID_STAT_OUTPUT_FLAG_CNT = STAT +GRID_STAT_OUTPUT_FLAG_SL1L2 = STAT +GRID_STAT_OUTPUT_FLAG_GRAD = STAT + +GRID_STAT_NC_PAIRS_FLAG_LATLON = TRUE +GRID_STAT_NC_PAIRS_FLAG_RAW = TRUE +GRID_STAT_NC_PAIRS_FLAG_DIFF = TRUE +GRID_STAT_NC_PAIRS_FLAG_CLIMO = FALSE +GRID_STAT_NC_PAIRS_FLAG_GRADIENT = TRUE +GRID_STAT_NC_PAIRS_FLAG_APPLY_MASK = TRUE + +GRID_STAT_MASK_POLY = {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp/GPP_17km_60S_60N_mask.nc + +[nbr] + +FCST_VAR1_THRESH = gt0, lt180.0, ge190.0, ge200.0, ge210.0, ge220.0, ge230.0, ge240.0, ge250.0, ge260.0, ge270.0, ge280.0, >SFP20, >SFP30, >SFP40, >SFP50, >SFP60, >SFP70, >SFP80 +FCST_VAR2_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0, >SFP20, >SFP30, >SFP40, >SFP50, >SFP60, >SFP70, >SFP80 + + +OBS_VAR1_THRESH = gt0, lt180.0, ge190.0, ge200.0, ge210.0, ge220.0, ge230.0, ge240.0, ge250.0, ge260.0, ge270.0, ge280.0, >SOP20, >SOP30, >SOP40, >SOP50, >SOP60, >SOP70, >SOP80 +OBS_VAR2_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0, >SOP20, >SOP30, >SOP40, >SOP50, >SOP60, >SOP70, >SOP80 + + +GRID_STAT_NEIGHBORHOOD_WIDTH = 3, 5, 7, 9 +GRID_STAT_NEIGHBORHOOD_SHAPE = CIRCLE + +GRID_STAT_NEIGHBORHOOD_COV_THRESH = >0.0 + +GRID_STAT_OUTPUT_FLAG_FHO = NONE +GRID_STAT_OUTPUT_FLAG_CTC = NONE +GRID_STAT_OUTPUT_FLAG_CTS = NONE +GRID_STAT_OUTPUT_FLAG_CNT = NONE +GRID_STAT_OUTPUT_FLAG_SL1L2 = NONE +GRID_STAT_OUTPUT_FLAG_NBRCTC = STAT +GRID_STAT_OUTPUT_FLAG_NBRCTS = STAT +GRID_STAT_OUTPUT_FLAG_NBRCNT = STAT +GRID_STAT_OUTPUT_FLAG_GRAD = NONE + +GRID_STAT_NC_PAIRS_FLAG_LATLON = TRUE +GRID_STAT_NC_PAIRS_FLAG_RAW = TRUE +GRID_STAT_NC_PAIRS_FLAG_DIFF = TRUE +GRID_STAT_NC_PAIRS_FLAG_NBRHD = TRUE +GRID_STAT_NC_PAIRS_FLAG_GRADIENT = TRUE +GRID_STAT_NC_PAIRS_FLAG_APPLY_MASK = TRUE + +GRID_STAT_OUTPUT_PREFIX = {MODEL}_to_{OBTYPE}_F{lead?fmt=%H}_CloudHgts_NBR + diff --git a/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp/read_input_data.py b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp/read_input_data.py new file mode 100755 index 0000000000..aa436135a6 --- /dev/null +++ b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp/read_input_data.py @@ -0,0 +1,911 @@ +#this code was provided by Craig Schwartz +#and is largely unaltered from its original +#function. + +#from __future__ import print_function +import os +import sys +import numpy as np +import datetime as dt +from netCDF4 import Dataset # http://code.google.com/p/netcdf4-python/ +from scipy.interpolate import NearestNDInterpolator, LinearNDInterpolator +#### for Plotting +import matplotlib.cm as cm +import matplotlib.axes as maxes +import matplotlib.pyplot as plt +from mpl_toolkits.axes_grid1 import make_axes_locatable +#from mpl_toolkits.basemap import Basemap +import fnmatch +import pygrib +import pickle as pk +##### + +########################################### + +missing_values = -9999.0 # for MET + +# UPP top layer bounds (Pa) for cloud layers +PTOP_LOW_UPP = 64200. # low for > 64200 Pa +PTOP_MID_UPP = 35000. # mid between 35000-64200 Pa +PTOP_HIGH_UPP = 15000. # high between 15000-35000 Pa + +# Values for 4 x 4 contingency table +Na, Nb, Nc, Nd = 1, 2, 3, 4 +Ne, Nf, Ng, Nh = 5, 6, 7, 8 +Ni, Nj, Nk, Nl = 9, 10, 11, 12 +Nm, Nn, No, Np = 13, 14, 15, 16 + +# Notes: +# 1) Entry for 'point' is for point-to-point comparison and is all dummy data (except for gridType) that is overwritten by point2point +# 2) ERA5 on NCAR CISL RDA changed at some point. Old is ERA5_2017 (not used anymore), new is ERA5, which we'll use for 2020 data +griddedDatasets = { + 'MERRA2' : { 'gridType':'LatLon', 'latVar':'lat', 'latDef':[-90.0,0.50,361], 'lonVar':'lon', 'lonDef':[-180.0,0.625,576], 'flipY':True, 'ftype':'nc'}, + 'SATCORPS' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[-180.0,0.3125,1152], 'flipY':False, 'ftype':'nc' }, + 'ERA5_2017': { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-89.7848769072,0.281016829130516,640], 'lonVar':'longitude', 'lonDef':[0.0,0.28125,1280], 'flipY':False, 'ftype':'nc' }, + 'ERA5' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'nc' }, + 'GFS' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'grib'}, + 'GALWEM' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':True, 'ftype':'grib'}, + 'GALWEM17' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-89.921875,0.156250,1152], 'lonVar':'longitude', 'lonDef':[0.117187,0.234375,1536], 'flipY':False, 'ftype':'grib'}, + 'WWMCA' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'grib'}, + 'MPAS' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'nc'}, + 'SAT_WWMCA_MEAN' : { 'gridType':'LatLon', 'latVar':'lat','latDef':[-90.0,0.25,721], 'lonVar':'lon', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'nc' }, + 'point' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.156250,1152], 'lonVar':'longitude', 'lonDef':[0.117187,0.234375,1536], 'flipY':False, 'ftype':'nc'}, +} + #TODO:Correct one, but MET can ingest a Gaussian grid only in Grib2 format (from Randy B.) + #'ERA5' : { 'gridType':'Gaussian', 'nx':1280, 'ny':640, 'lon_zero':0, 'latVar':'latitude', 'lonVar':'longitude', 'flipY':False, }, + +#GALWEM, both 17-km and 0.25-degree +lowCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':3, 'typeOfFirstFixedSurface':10, 'shortName':'lcc' } +midCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':4, 'typeOfFirstFixedSurface':10, 'shortName':'mcc' } +highCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':5, 'typeOfFirstFixedSurface':10, 'shortName':'hcc' } +totalCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':10, 'shortName':'tcc' } +cloudTopHeight_GALWEM = { 'parameterCategory':6, 'parameterNumber':12, 'typeOfFirstFixedSurface':3, 'shortName':'cdct' } +cloudBaseHeight_GALWEM = { 'parameterCategory':6, 'parameterNumber':11, 'typeOfFirstFixedSurface':2, 'shortName':'cdcb' } + +#GFS +lowCloudFrac_GFS = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':214, 'shortName':'tcc' } +midCloudFrac_GFS = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':224, 'shortName':'tcc' } +highCloudFrac_GFS = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':234, 'shortName':'tcc' } + +#WWMCA +totalCloudFrac_WWMCA = { 'parameterName':71, 'typeOfLevel':'entireAtmosphere', 'level':0 } + +cloudTopHeightLev1_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':1 } +cloudTopHeightLev2_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':2 } +cloudTopHeightLev3_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':3 } +cloudTopHeightLev4_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':4 } +cloudTopHeight_WWMCA = [ cloudTopHeightLev1_WWMCA, cloudTopHeightLev2_WWMCA, cloudTopHeightLev3_WWMCA, cloudTopHeightLev4_WWMCA ] + +cloudBaseHeightLev1_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':1 } +cloudBaseHeightLev2_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':2 } +cloudBaseHeightLev3_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':3 } +cloudBaseHeightLev4_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':4 } +cloudBaseHeight_WWMCA = [ cloudBaseHeightLev1_WWMCA, cloudBaseHeightLev2_WWMCA, cloudBaseHeightLev3_WWMCA, cloudBaseHeightLev4_WWMCA ] + +verifVariablesModel = { + 'binaryCloud' : {'GFS':[''], 'GALWEM17':[totalCloudFrac_GALWEM], 'GALWEM':[totalCloudFrac_GALWEM], 'MPAS':['cldfrac_tot_UM_rand']}, + 'totalCloudFrac' : {'GFS':[''], 'GALWEM17':[totalCloudFrac_GALWEM], 'GALWEM':[totalCloudFrac_GALWEM], 'MPAS':['cldfrac_tot_UM_rand']}, + 'lowCloudFrac' : {'GFS':[lowCloudFrac_GFS], 'GALWEM17':[lowCloudFrac_GALWEM], 'GALWEM':[lowCloudFrac_GALWEM], 'MPAS':['cldfrac_low_UM']}, + 'midCloudFrac' : {'GFS':[midCloudFrac_GFS], 'GALWEM17':[midCloudFrac_GALWEM], 'GALWEM':[midCloudFrac_GALWEM], 'MPAS':['cldfrac_mid_UM']}, + 'highCloudFrac' : {'GFS':[highCloudFrac_GFS], 'GALWEM17':[highCloudFrac_GALWEM], 'GALWEM':[highCloudFrac_GALWEM], 'MPAS':['cldfrac_high_UM']}, + 'cloudTopHeight' : {'GFS':[''] , 'GALWEM17':[cloudTopHeight_GALWEM], 'GALWEM':[cloudTopHeight_GALWEM], 'MPAS':['cldht_top_UM']}, + 'cloudBaseHeight' : {'GFS':[''] , 'GALWEM17':[cloudBaseHeight_GALWEM], 'GALWEM':[cloudBaseHeight_GALWEM], 'MPAS':['cldht_base_UM']}, +} + +cloudFracCatThresholds = '>0, <10.0, >=10.0, >=20.0, >=30.0, >=40.0, >=50.0, >=60.0, >=70.0, >=80.0, >=90.0' # MET format string +brightnessTempThresholds = '<280.0, <275.0, <273.15, <270.0, <265.0, <260.0, <255.0, <250.0, <245.0, <240.0, <235.0, <230.0, <225.0, <220.0, <215.0, <210.0, <=SFP1, <=SFP5, <=SFP10, <=SFP25, <=SFP50, >=SFP50, >=SFP75, >=SFP90, >=SFP95, >=SFP99' +verifVariables = { + 'binaryCloud' : { 'MERRA2':['CLDTOT'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['TCC'], 'WWMCA':[totalCloudFrac_WWMCA], 'SAT_WWMCA_MEAN':['Mean_WWMCA_SATCORPS'], 'units':'NA', 'thresholds':'>0.0', 'interpMethod':'nearest' }, + 'totalCloudFrac' : { 'MERRA2':['CLDTOT'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['tcc'], 'WWMCA':[totalCloudFrac_WWMCA], 'SAT_WWMCA_MEAN':['Mean_WWMCA_SATCORPS'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'lowCloudFrac' : { 'MERRA2':['CLDLOW'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['lcc'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'midCloudFrac' : { 'MERRA2':['CLDMID'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['MCC'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'highCloudFrac' : { 'MERRA2':['CLDHGH'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['HCC'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'cloudTopTemp' : { 'MERRA2':['CLDTMP'], 'SATCORPS':['cloud_temperature_top_level'], 'ERA5':[''] , 'units':'K', 'thresholds':'NA', 'interpMethod':'bilin'}, + 'cloudTopPres' : { 'MERRA2':['CLDPRS'], 'SATCORPS':['cloud_pressure_top_level'], 'ERA5':[''] , 'units':'hPa', 'thresholds':'NA', 'interpMethod':'bilin'}, + 'cloudTopHeight' : { 'MERRA2':[''] , 'SATCORPS':['cloud_height_top_level'], 'ERA5':[''] , 'WWMCA':cloudTopHeight_WWMCA, 'units':'m', 'thresholds':'NA', 'interpMethod':'nearest'}, + 'cloudBaseHeight': { 'MERRA2':[''] , 'SATCORPS':['cloud_height_base_level'], 'ERA5':['cbh'], 'WWMCA':cloudBaseHeight_WWMCA, 'units':'m', 'thresholds':'NA', 'interpMethod':'nearest'}, + 'cloudCeiling' : { 'MERRA2':[''] , 'SATCORPS':[''], 'ERA5':[''] , 'units':'m', 'thresholds':'NA', 'interpMethod':'bilin'}, + 'brightnessTemp' : { 'MERRA2':[''] , 'SATCORPS':[''], 'ERA5':[''] , 'units':'K', 'thresholds':brightnessTempThresholds, 'interpMethod':'bilin'}, +} + +# Combine the two dictionaries +# Only reason verifVariablesModel exists is just for space--verifVaribles gets too long if we keep adding more datasets +for key in verifVariablesModel.keys(): + x = verifVariablesModel[key] + for key1 in x.keys(): + verifVariables[key][key1] = x[key1] + +#f = '/glade/u/home/schwartz/cloud_verification/GFS_grib_0.25deg/2018112412/gfs.0p25.2018112412.f006.grib2' +#grbs = pygrib.open(f) +#idx = pygrib.index(f,'parameterCategory','parameterNumber','typeOfFirstFixedSurface') +#model = 'GFS' +#variable = 'totCloudCover' +#x = verifVariablesModel[variable][model] # returns a list, whose ith element is a dictionary +# e.g., idx(parameterCategory=6,parameterNumber=1,typeOfFirstFixedSurface=234) +#idx(parameterCategory=x[0]['parameterCategory'],parameterNumber=x[0]['parameterNumber'],typeOfFirstFixedSurface=x[0]['typeOfFirstFixedSurface']) + +# to read in an environmental variable +#x = os.getenv('a') # probably type string no matter what + +########### + +def getThreshold(variable): + x = verifVariables[variable]['thresholds'] + print(x) # needed for python 3 to read variable into csh variable + return x + +def getInterpMethod(variable): + x = verifVariables[variable]['interpMethod'].upper() + print(x) # needed for python 3 to read variable into csh variable + return x + +def getTotalCloudFrac(source,data): + if source == 'SATCORPS': + # x = data[0][0,:,:,0] * 1.0E-2 # scaling + x = (data[0][0,:,:,1] + data[0][0,:,:,2] + data[0][0,:,:,3])*1.0E-2 # scaling + # y = data[0] + # x = np.sum( y[:,:,:,1:4],axis=3) + elif source == 'MERRA2': +# x = ( data[0][0,:,:]+data[1][0,:,:]+data[2][0,:,:] ) *100.0 # the ith element of data is a numpy array + x = data[0][0,:,:] * 100.0 # the ith element of data is a numpy array + print(x.min(), x.max()) + elif source == 'ERA5': + try: x = data[0][0,0,:,:] * 100.0 + except: x = data[0][0,:,:] * 100.0 + elif source == 'MPAS': + x = data[0][0,:,:] * 100.0 + elif source == 'SAT_WWMCA_MEAN': + x = data[0][0,:,:] # already in % + else: + x = data[0] + + # This next line is WRONG. + # Missing should be set to missing + # Then, the non-missing values are 1s and 0s + #output = np.where(x > 0.0, x, 0.0) + #output = np.where(x < 0.0, -9999.0, x) # missing. currently used for SATCORPS + + x = np.where( x < 0.0 , 0.0, x) # Force negative values to zero + x = np.where( x > 100.0, 100.0, x) # Force values > 100% to 100% + return x + +def getBinaryCloud(source,data): + y = getTotalCloudFrac(source,data) + # keep NaNs as is, but then set everything else to either 100% or 0% + x = np.where( np.isnan(y), y, np.where(y > 0.0, 100.0, 0.0) ) + return x + +def getLayerCloudFrac(source,data,layer): + if source == 'SATCORPS': + if layer.lower().strip() == 'low' : i = 1 + if layer.lower().strip() == 'mid' : i = 2 + if layer.lower().strip() == 'high' : i = 3 + x = data[0][0,:,:,i] * 1.0E-2 # scaling + elif source == 'MERRA2': + x = data[0][0,:,:] * 100.0 + elif source == 'ERA5': + try: x = data[0][0,0,:,:] * 100.0 + except: x = data[0][0,:,:] * 100.0 + elif source == 'MPAS': + x = data[0][0,:,:] * 100.0 + else: + x = data[0] + + x = np.where( x < 0.0, 0.0, x) # Force negative values to zero + x = np.where( x > 100.0, 100.0, x) # Force values > 100% to 100% + + return x + +def getCloudTopTemp(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E-2 # scaling + elif source == 'MERRA2': + x = data[0][0,:,:] + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + else: + x = data[0] + return x + +def getCloudTopPres(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E-1 # scaling + elif source == 'MERRA2': + x = data[0][0,:,:] * 1.0E-2 # scaling [Pa] -> [hPa] + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + else: + x = data[0] + return x + +def getCloudTopHeight(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E+1 # scaling to [meters] + elif source == 'MERRA2': + x = data[0][0,:,:] #TBD + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + elif source == 'GALWEM17': + x = data[0] * 1000.0 * 0.3048 # kilofeet -> meters + elif source == 'MPAS': + x = data[0][0,:,:] # already in meters + elif source == 'WWMCA': + # data is a list (should be length 4) + if len(data) != 4: + print('error with WWMCA Cloud top height') + sys.exit() + tmp = np.array(data) # already in meters + tmp = np.where( tmp <= 0, np.nan, tmp) # replace 0 or negative values with NAN + x = np.nanmax(tmp,axis=0) # get maximum cloud top height across all layers + else: + x = data[0] + + # Eliminate unphysical values (assume cloud top shouldn't be > 50000 meters) + y = np.where( x > 50000.0 , np.nan, x ) + + return y + +def getCloudBaseHeight(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E+1 # scaling to [meters] + elif source == 'MERRA2': + x = data[0][0,:,:] #TBD + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + elif source == 'GALWEM17': + x = data[0] * 1000.0 * 0.3048 # kilofeet -> meters + elif source == 'MPAS': + x = data[0][0,:,:] # already in meters + elif source == 'WWMCA': + # data is a list (should be length 4) + if len(data) != 4: + print('error with WWMCA Cloud base height') + sys.exit() + tmp = np.array(data) # already in meters + tmp = np.where( tmp <= 0, np.nan, tmp) # replace 0 or negative values with NAN + x = np.nanmin(tmp,axis=0) # get lowest cloud base over all layers + else: + x = data[0] + + # Eliminate unphysical values (assume cloud base shouldn't be > 50000 meters) + y = np.where( x > 50000.0 , np.nan, x ) + + return y + +def getCloudCeiling(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] #TBD + elif source == 'MERRA2': + x = data[0][0,:,:] #TBD + elif source == 'ERA5': + try: x = data[0][0,0,:,:] # TBD + except: x = data[0][0,:,:] + return x + +# add other functions for different variables + +########### + +def getDataArray(inputFile,source,variable,dataSource): + # 1) inputFile: File name--either observations or forecast + # 2) source: Obsevation source (e.g., MERRA, SATCORP, etc.) + # 3) variable: Variable to verify + # 4) dataSource: If 1, process forecast file. If 2 process obs file. + +# # specifying names here temporarily. file names should be passed in to python from shell script +# if source == 'merra': nc_file = '/gpfs/fs1/scratch/schwartz/MERRA/MERRA2_400.tavg1_2d_rad_Nx.20181101.nc4' +# elif source == 'satcorp': nc_file = '/glade/scratch/bjung/met/test_satcorps/GEO-MRGD.2018334.0000.GRID.NC' +# elif source == 'era5': nc_file = '/glade/scratch/bjung/met/test_era5/e5.oper.fc.sfc.instan.128_164_tcc.regn320sc.2018111606_2018120112.nc' + + source = source.upper().strip() # Force uppercase and get rid of blank spaces, for safety + + print('dataSource = ',dataSource) + + ftype = griddedDatasets[source]['ftype'].lower().strip() + + # Get file handle + if ftype == 'nc': + nc_fid = Dataset(inputFile, "r", format="NETCDF4") + #nc_fid.set_auto_scale(True) + elif ftype == 'grib': + if source == 'WWMCA': + idx = pygrib.index(inputFile,'parameterName','typeOfLevel','level') + else: + idx = pygrib.index(inputFile,'parameterCategory','parameterNumber','typeOfFirstFixedSurface') + + # dataSource == 1 means forecast, 2 means obs +# if dataSource == 1: varsToRead = verifVariablesModel[variable][source] # if ftype == 'grib', returns a list whose ith element is a dictionary. otherwise, just a list +# if dataSource == 2: varsToRead = verifVariables[variable][source] # returns a list + varsToRead = verifVariables[variable][source] # if ftype == 'grib', returns a list whose ith element is a dictionary. otherwise, just a list + + print('Trying to read ',inputFile) + + # Get lat/lon information--currently not used + #latVar = griddedDatasets[source]['latVar'] + #lonVar = griddedDatasets[source]['lonVar'] + #lats = np.array(nc_fid.variables[latVar][:]) # extract/copy the data + #lons = np.array(nc_fid.variables[lonVar][:] ) + + #print(lats.max()) + #print(lons.max()) + + # one way to deal with scale factors + # probably using something like nc_fid.set_auto_scale(True) is better... + #latMax = lats.max() + #while latMax > 90.0: + # lons = lons * 0.1 + # lats = lats * 0.1 + # latMax = lats.max() + + # get data + data = [] + for v in varsToRead: + if ftype == 'grib': + if source == 'WWMCA': + x = idx(parameterName=v['parameterName'],typeOfLevel=v['typeOfLevel'],level=v['level'])[0] # by getting element 0, you get a pygrib message + else: + # e.g., idx(parameterCategory=6,parameterNumber=1,typeOfFirstFixedSurface=234) + if ( variable == 'cloudTopHeight' or variable == 'cloudBaseHeight') and source == 'GALWEM17': + x = idx(parameterCategory=v['parameterCategory'],parameterNumber=v['parameterNumber'],typeOfFirstFixedSurface=v['typeOfFirstFixedSurface'])[1] # by getting element 1, you get a pygrib message + else: + x = idx(parameterCategory=v['parameterCategory'],parameterNumber=v['parameterNumber'],typeOfFirstFixedSurface=v['typeOfFirstFixedSurface'])[0] # by getting element 0, you get a pygrib message + if x.shortName != v['shortName']: print('Name mismatch!') + #ADDED BY JOHN O + print(x) + print('Reading ', x.shortName, 'at level ', x.typeOfFirstFixedSurface) + read_var = x.values # same x.data()[0] + read_missing = x.missingValue + print('missing value = ',read_missing) + + # The missing value (read_missing) for GALWEM17 and GALWEM cloud base/height is 9999, which is not the best choice because + # those could be actual values. So we need to use the masked array part (below) to handle which + # values are missing. We also set read_missing to something unphysical to essentially disable it. + # Finally, if we don't change the 'missingValue' property in the GRIB2 file we are eventually outputting, + # the bitmap will get all messed up, because it will be based on 9999 instead of $missing_values + if variable == 'cloudTopHeight' or variable == 'cloudBaseHeight': + read_missing = -9999. + x['missingValue'] = read_missing + if source == 'GALWEM17': + #These are masked numpy arrays, with mask = True where there is a missing value (no cloud) + #Use np.ma.filled to create an ndarray where mask = True values are set to np.nan + read_var = np.ma.filled(read_var.astype(read_var.dtype), np.nan) + elif ftype == 'nc': + read_var = nc_fid.variables[v] # extract/copy the data + try: + read_missing = read_var.missing_value # get variable attributes. Each dataset has own missing values. + except: + read_missing = -9999. # set a default missing value. probably only need to do this for MPAS + + print('Reading ', v) + + this_var = np.array( read_var ) # to numpy array + #print(read_missing, np.nan) + this_var = np.where( this_var==read_missing, np.nan, this_var ) + #print(this_var.shape) + data.append(this_var) # ith element of the list is a NUMPY ARRAY for the ith variable + #print(type(this_var)) + #print(type(data)) + + # Call a function to get the variable of interest. + # Add a new function for each variable + if variable == 'binaryCloud': raw_data = getBinaryCloud(source,data) + if variable == 'totalCloudFrac': raw_data = getTotalCloudFrac(source,data) + if variable == 'lowCloudFrac': raw_data = getLayerCloudFrac(source,data,'low') + if variable == 'midCloudFrac': raw_data = getLayerCloudFrac(source,data,'mid') + if variable == 'highCloudFrac': raw_data = getLayerCloudFrac(source,data,'high') + if variable == 'cloudTopTemp': raw_data = getCloudTopTemp(source,data) + if variable == 'cloudTopPres': raw_data = getCloudTopPres(source,data) + if variable == 'cloudTopHeight': raw_data = getCloudTopHeight(source,data) + if variable == 'cloudBaseHeight': raw_data = getCloudBaseHeight(source,data) + if variable == 'cloudCeiling': raw_data = getCloudCeiling(source,data) + + raw_data = np.where(np.isnan(raw_data), missing_values, raw_data) # replace np.nan to missing_values (for MET) + + # Array met_data is passed to MET + # Graphics should plot $met_data to make sure things look correct + if griddedDatasets[source]['flipY']: + print('flipping ',source,' data about y-axis') + met_data=np.flip(raw_data,axis=0).astype(float) + else: + met_data=raw_data.astype(float) + + # Make plotting optional or Just use plot_data_plane +# plt_data=np.where(met_data<0, np.nan, met_data) +# map=Basemap(projection='cyl',llcrnrlat=-90,urcrnrlat=90,llcrnrlon=-180,urcrnrlon=180,resolution='c') +# map.drawcoastlines() +# map.drawcountries() +# map.drawparallels(np.arange(-90,90,30),labels=[1,1,0,1]) +# map.drawmeridians(np.arange(0,360,60),labels=[1,1,0,1]) +# plt.contourf(lons,lats,plt_data,20,origin='upper',cmap=cm.Greens) #cm.gist_rainbow) +# title=source+"_"+variable+"_"+str(validTime) +# plt.title(title) +# plt.colorbar(orientation='horizontal') +# plt.savefig(title+".png") + + # If a forecast file, output a GRIB file with + # 1 record containing the met_data + # This is a hack, because right now, MET python embedding doesn't work with pygrib, + # so output the data to a temporary file, and then have MET read the temporary grib file. + # Starting with version 9.0 of MET, the hack isn't needed, and MET python embedding works with pygrib + outputFcstFile = False # MUST be True for MET version < 9.0. For MET 9.0+, optional + if dataSource == 1 and ftype == 'grib': + if outputFcstFile: + grbtmp = x + grbtmp['values']=met_data + grbout = open('temp_fcst.grb2','ab') + grbout.write(grbtmp.tostring()) + grbout.close() # Close the outfile GRIB file + print('Successfully output temp_fcst.grb2') + + # Close files + if ftype == 'grib': idx.close() # Close the input GRIB file + if ftype == 'nc': nc_fid.close() # Close the netCDF file + + return met_data + +def obsError(fcstData,obsErrorFile,validDate,dataSource): + + print('Adding noise to the cloud fraction fields') + print('Using obsErrorFile',obsErrorFile) + + # First load the obsError information + #obsErrorFile = 'ob_errors.pk' + infile = open(obsErrorFile,'rb') + binEdges, binStddev = pk.load(infile) # 'numpy.ndarray' types + infile.close() + + # Get 1d forecast data + shape = fcstData.shape + fcst = fcstData.flatten() + + # Set random number seed based on valid time and model + if dataSource.upper().strip() == 'MPAS': ii = 10 + elif dataSource.upper().strip() == 'GALWEM': ii = 20 + elif dataSource.upper().strip() == 'GFS': ii = 30 + np.random.seed(int(validDate*.1 + ii)) + + # Find which bin the data is in + for i in range(0,len(binEdges)-1): + idx = np.where( (fcst >= binEdges[i]) & (fcst < binEdges[i+1]) )[0] + n = len(idx) # number of points in the ith bin + if n > 0: # check for empty bins + randVals = np.random.normal(0,binStddev[i],n) + fcst[idx] = fcst[idx] + randVals + + # bound forecast values to between 0 and 100% + fcst = np.where( fcst < 0.0, 0.0, fcst) + fcst = np.where( fcst > 100.0, 100.0, fcst) + + # now reshape forecast data back to 2D + output = fcst.reshape(shape) + + # data will have NaNs where bad. + return output + +def getFcstCloudFrac(cfr,pmid,psfc,layerDefinitions): # cfr is cloud fraction(%), pmid is 3D pressure(Pa), psfc is surface pressure (Pa) code from UPP ./INITPOST.F + + if pmid.shape != cfr.shape: # sanity check + print('dimension mismatch bewteen cldfra and pressure') + sys.exit() + + nlocs, nlevs = pmid.shape + + if len(psfc) != nlocs: # another sanity check + print('dimension mismatch bewteen cldfra and surface pressure') + sys.exit() + + cfracl = np.zeros(nlocs) + cfracm = np.zeros(nlocs) + cfrach = np.zeros(nlocs) + + for i in range(0,nlocs): + + PTOP_HIGH = PTOP_HIGH_UPP + if layerDefinitions.upper().strip() == 'ERA5': + PTOP_LOW = 0.8*psfc[i] + PTOP_MID = 0.45*psfc[i] + elif layerDefinitions.upper().strip() == 'UPP': + PTOP_LOW = PTOP_LOW_UPP + PTOP_MID = PTOP_MID_UPP + + idxLow = np.where( pmid[i,:] >= PTOP_LOW)[0] # using np.where with just 1 argument returns tuple + idxMid = np.where( (pmid[i,:] < PTOP_LOW) & (pmid[i,:] >= PTOP_MID))[0] + idxHigh = np.where( (pmid[i,:] < PTOP_MID) & (pmid[i,:] >= PTOP_HIGH))[0] + + # use conditions in case all indices are missing + if (len(idxLow) >0 ): cfracl[i] = np.max( cfr[i,idxLow] ) + if (len(idxMid) >0 ): cfracm[i] = np.max( cfr[i,idxMid] ) + if (len(idxHigh) >0 ): cfrach[i] = np.max( cfr[i,idxHigh] ) + + tmp = np.vstack( (cfracl,cfracm,cfrach)) # stack the rows into one 2d array + cldfraMax = np.max(tmp,axis=0) # get maximum value across low/mid/high for each pixel (minimum overlap assumption) + + # This is the fortran code put into python format...double loop unnecessary and slow + #for i in range(0,nlocs): + # for k in range(0,nlevs): + # if pmid(i,k) >= PTOP_LOW: + # cfracl(i) = np.max( [cfracl(i),cfr(i,k)] ) # Low + # elif pmid(i,k) < PTOP_LOW and pmid(i,k) >= PTOP_MID: + # cfracm(i) = np.max( [cfracm(i),cfr(i,k)] ) # Mid + # elif pmid(i,k) < PTOP_MID and pmid(i,k) >= PTOP_HIGH: # High + # cfrach(i) = np.max( [cfrach(i),cfr(i,k)] ) + + return cfracl, cfracm, cfrach, cldfraMax + +def getGOES16LatLon(g16_data_file): + + # Start timer + startTime = dt.datetime.utcnow() + + # designate dataset + g16nc = Dataset(g16_data_file, 'r') + + # GOES-R projection info and retrieving relevant constants + proj_info = g16nc.variables['goes_imager_projection'] + lon_origin = proj_info.longitude_of_projection_origin + H = proj_info.perspective_point_height+proj_info.semi_major_axis + r_eq = proj_info.semi_major_axis + r_pol = proj_info.semi_minor_axis + + # Data info + lat_rad_1d = g16nc.variables['x'][:] + lon_rad_1d = g16nc.variables['y'][:] + + # close file when finished + g16nc.close() + g16nc = None + + # create meshgrid filled with radian angles + lat_rad,lon_rad = np.meshgrid(lat_rad_1d,lon_rad_1d) + + # lat/lon calc routine from satellite radian angle vectors + + lambda_0 = (lon_origin*np.pi)/180.0 + + a_var = np.power(np.sin(lat_rad),2.0) + (np.power(np.cos(lat_rad),2.0)*(np.power(np.cos(lon_rad),2.0)+(((r_eq*r_eq)/(r_pol*r_pol))*np.power(np.sin(lon_rad),2.0)))) + b_var = -2.0*H*np.cos(lat_rad)*np.cos(lon_rad) + c_var = (H**2.0)-(r_eq**2.0) + + r_s = (-1.0*b_var - np.sqrt((b_var**2)-(4.0*a_var*c_var)))/(2.0*a_var) + + s_x = r_s*np.cos(lat_rad)*np.cos(lon_rad) + s_y = - r_s*np.sin(lat_rad) + s_z = r_s*np.cos(lat_rad)*np.sin(lon_rad) + + lat = (180.0/np.pi)*(np.arctan(((r_eq*r_eq)/(r_pol*r_pol))*((s_z/np.sqrt(((H-s_x)*(H-s_x))+(s_y*s_y)))))) + lon = (lambda_0 - np.arctan(s_y/(H-s_x)))*(180.0/np.pi) + + # End timer + endTime = dt.datetime.utcnow() + time = (endTime - startTime).microseconds / (1000.0*1000.0) + print('took %f4.1 seconds to get GOES16 lat/lon'%(time)) + + return lon,lat # lat/lon are 2-d arrays + +# -- +def getGOESRetrivalData(goesFile,goesVar): + + if not os.path.exists(goesFile): + print(goesFile+' not there. exit') + sys.exit() + + # First get GOES lat/lon + goesLon2d, goesLat2d = getGOES16LatLon(goesFile) # 2-d arrays + goesLon = goesLon2d.flatten() # 1-d arrays + goesLat = goesLat2d.flatten() + + # Now open the file and get the data we want + nc_goes = Dataset(goesFile, "r", format="NETCDF4") + + # If the next line is true (it should be), this indicates the variable needs to be treated + # as an "unsigned 16-bit integer". This is a pain. So we must use the "astype" method + # to change the variable type BEFORE applying scale_factor and add_offset. After the conversion + # we then can manually apply the scale factor and offset + #goesVar = 'PRES' + goesVar = goesVar.strip() # for safety + if nc_goes.variables[goesVar]._Unsigned.lower().strip() == 'true': + nc_goes.set_auto_scale(False) # Don't automatically apply scale_factor and add_offset to variable + goesData2d = np.array( nc_goes.variables[goesVar]).astype(np.uint16) + goesData2d = goesData2d * nc_goes.variables[goesVar].scale_factor + nc_goes.variables[goesVar].add_offset + goesQC2d = np.array( nc_goes.variables['DQF']).astype(np.uint8) + else: + goesData2d = np.array( nc_goes.variables[goesVar]) + goesQC2d = np.array( nc_goes.variables['DQF']) + + # Make variables 1-d + goesQC = goesQC2d.flatten() + goesData = goesData2d.flatten() + nc_goes.close() + + # Get rid of NaNs; base it on longitude + goesData = goesData[~np.isnan(goesLon)] # Handle data arrays first before changing lat/lon itself + goesQC = goesQC[~np.isnan(goesLon)] + goesLon = goesLon[~np.isnan(goesLon)] # ~ is "logical not", also np.logical_not + goesLat = goesLat[~np.isnan(goesLat)] + if goesLon.shape != goesLat.shape: + print('GOES lat/lon shape mismatch') + sys.exit() + + # If goesQC == 0, good QC and there was a cloud with a valid pressure. + # If goesQC == 4, no cloud; probably clear sky. + # All other QC means no data, and we want to remove those points + idx = np.logical_or( goesQC == 0, goesQC == 4) # Only keep QC == 0 or 4 + goesData = goesData[idx] + goesQC = goesQC[idx] + goesLon = goesLon[idx] + goesLat = goesLat[idx] + + # Only QC with 0 or 4 are left; now set QC == 4 to missing to indicate clear sky + goesData = np.where( goesQC != 0, missing_values, goesData) + + # Get longitude to between (0,360) for consistency with JEDI files (this check is applied to JEDI files, too) + goesLon = np.where( goesLon < 0, goesLon + 360.0, goesLon ) + + print('Min GOES Lon = ',np.min(goesLon)) + print('Max GOES Lon = ',np.max(goesLon)) + + return goesLon, goesLat, goesData + +def point2point(source,inputDir,satellite,channel,goesFile,condition,layerDefinitions,dataSource): + + # Static Variables for QC and obs + qcVar = 'brightness_temperature_'+str(channel)+'@EffectiveQC' #'@EffectiveQC0' # QC variable + obsVar = 'brightness_temperature_'+str(channel)+'@ObsValue' # Observation variable + + # Get GOES-16 retrieval file with auxiliary information + if 'abi' in satellite or 'ahi' in satellite: + goesLon, goesLat, goesData = getGOESRetrivalData(goesFile,'PRES') # return 1-d arrays + lonlatGOES = np.array( list(zip(goesLon, goesLat))) # lon/lat pairs for each GOES ob (nobs_GOES, 2) + #print('shape lonlatGOES = ',lonlatGOES.shape) + print('getting data from ',goesFile) + myGOESInterpolator = NearestNDInterpolator(lonlatGOES,goesData) + + # First check to see if there's a concatenated file with all obs. + # If so, use that. If not, have to process one file per processor, which takes a lot more time + if os.path.exists(inputDir+'/obsout_omb_'+satellite+'_ALL.nc4'): + inputFiles = [inputDir+'/obsout_omb_'+satellite+'_ALL.nc4'] # needs to be in a list since we loop over inputFiles + else: + # Get list of OMB files to process. There is one file per processor. + # Need to get them in order so they are called in the same order for the + # forecast and observed passes through this subroutine. + files = os.listdir(inputDir) + inputFiles = fnmatch.filter(files,'obsout*_'+satellite+'*nc4') # returns relative path names + inputFiles = [inputDir+'/'+s for s in inputFiles] # add on directory name + inputFiles.sort() # Get in order from low to high + if len(inputFiles) == 0: return -99999, -99999 # if no matching files, force a failure + + # Variable to pull for brightness temperature +# if dataSource == 1: v = 'brightness_temperature_'+str(channel)+'@GsiHofXBc' # Forecast variable + if dataSource == 1: v = 'brightness_temperature_'+str(channel)+'@hofx' #'@depbg' # OMB + if dataSource == 2: v = obsVar + + # Read the files and put data in array + allData, allDataQC = [], [] + for inputFile in inputFiles: + nc_fid = Dataset(inputFile, "r", format="NETCDF4") #Dataset is the class behavior to open the file + print('Trying to read ',v,' from ',inputFile) + + # Read forecast/obs data + read_var = nc_fid.variables[v] # extract/copy the data + # read_missing = read_var.missing_value # get variable attributes. Each dataset has own missing values. + this_var = np.array( read_var ) # to numpy array + # this_var = np.where( this_var==read_missing, np.nan, this_var ) + + #if dataSource == 1: # If true, we just read in OMB data, but we want B + # obsData = np.array( nc_fid.variables[obsVar]) + # this_var = obsData - this_var # get background/forecast value (O - OMB = B) + + #Read QC data + qcData = np.array(nc_fid.variables[qcVar]) + + # Sanity check...shapes should match + if qcData.shape != this_var.shape: return -99999, -99999 + + if 'abi' in satellite or 'ahi' in satellite: + + # Get the GOES-16 retrieval data at the observation locations in this file + # GOES values < 0 mean clear sky + lats = np.array(nc_fid.variables['latitude@MetaData']) + lons = np.array(nc_fid.variables['longitude@MetaData']) + + # Get longitude to between (0,360) for consistency with GOES-16 files + lons = np.where( lons < 0, lons + 360.0, lons ) + + lonlat = np.array( list(zip(lons,lats))) # lon/lat pairs for each ob (nobs, 2) + thisGOESData = myGOESInterpolator(lonlat) # GOES data at obs locations in this file. If pressure, units are hPa + thisGOESData = thisGOESData * 100.0 # get into Pa + + #obsCldfra = np.array( nc_fid.variables['cloud_area_fraction@MetaData'] )*100.0 # Get into %...observed cloud fraction (AHI/ABI only) + + geoValsFile = inputFile.replace('obsout','geoval') + if not os.path.exists(geoValsFile): + print(geoValsFile+' not there. exit') + sys.exit() + + nc_fid2 = Dataset(geoValsFile, "r", format="NETCDF4") + fcstCldfra = np.array( nc_fid2.variables['cloud_area_fraction_in_atmosphere_layer'])*100.0 # Get into % + pressure = np.array( nc_fid2.variables['air_pressure']) # Pa + pressure_edges = np.array( nc_fid2.variables['air_pressure_levels']) # Pa + psfc = pressure_edges[:,-1] # Surface pressure (Pa)...array order is top down + if layerDefinitions.upper().strip() == 'ERA5': + PTOP_LOW = 0.8*psfc # these are arrays + PTOP_MID = 0.45*psfc + PTOP_HIGH = PTOP_HIGH_UPP * np.ones_like(psfc) + elif layerDefinitions.upper().strip() == 'UPP': + PTOP_LOW = PTOP_LOW_UPP # these are constants + PTOP_MID = PTOP_MID_UPP + PTOP_HIGH = PTOP_HIGH_UPP + else: + print('layerDefinitions = ',layerDefinitions,'is invalid. exit') + sys.exit() + fcstLow,fcstMid,fcstHigh,fcstTotCldFra = getFcstCloudFrac(fcstCldfra,pressure,psfc,layerDefinitions) # get low/mid/high/total forecast cloud fractions for each ob + nc_fid2.close() + + # Modify QC data based on correspondence between forecast and obs. qcData used to select good data later + # It's possible that there are multiple forecast layers, such that fcstLow,fcstMid,fcstHigh are all > $cldfraThresh + # However, GOES-16 CTP doesn't really account for layering. So, we need to remove layered clouds from the forecast, + # focusing only on the layers that we asked for when doing {low,mid,high}Only conditions + # The "|" is symbol for "np.logcal_or" + yes = 2.0 + no = 0.0 + cldfraThresh = 20.0 # percent + if qcData.shape == fcstTotCldFra.shape == thisGOESData.shape: # these should all match + print('Using condition ',condition,'for ABI/AHI') + + # Note that "&" is "np.logical_and" for boolean (true/false) quantities. + # Thus, each condition should be enclosed in parentheses + if condition.lower().strip() == 'clearOnly'.lower(): # clear in both forecast and obs + qcData = np.where( (fcstTotCldFra < cldfraThresh) & (thisGOESData <= 0.0), qcData, missing_values) + elif condition.lower().strip() == 'cloudyOnly'.lower(): # cloudy in both forecast and obs + qcData = np.where( (fcstTotCldFra >= cldfraThresh) & (thisGOESData > 0.0), qcData, missing_values) + elif condition.lower().strip() == 'lowOnly'.lower(): # low clouds in both forecast and obs + fcstLow = np.where( (fcstMid >= cldfraThresh) | ( fcstHigh >= cldfraThresh), missing_values, fcstLow) # remove mid, high + qcData = np.where( (fcstLow >= cldfraThresh) & ( thisGOESData >= PTOP_LOW), qcData, missing_values) + elif condition.lower().strip() == 'midOnly'.lower(): # mid clouds in both forecast and obs + fcstMid = np.where( (fcstLow >= cldfraThresh) | ( fcstHigh >= cldfraThresh), missing_values, fcstMid) # remove low, high + qcData = np.where( (fcstMid >= cldfraThresh) & (thisGOESData < PTOP_LOW) & (thisGOESData >= PTOP_MID), qcData, missing_values) + elif condition.lower().strip() == 'highOnly'.lower(): # high clouds in both forecast and obs + fcstHigh = np.where( (fcstLow >= cldfraThresh) | ( fcstMid >= cldfraThresh), missing_values, fcstHigh) # remove mid, high + qcData = np.where( (fcstHigh >= cldfraThresh) & (thisGOESData < PTOP_MID) & (thisGOESData >= PTOP_HIGH), qcData, missing_values) + elif condition.lower().strip() == 'fcstLow'.lower(): # low clouds in forecast (layers possible); obs could be anything + qcData = np.where( fcstLow >= cldfraThresh , qcData, missing_values) + elif condition.lower().strip() == 'fcstMid'.lower(): # low clouds in forecast (layers possible); obs could be anything + qcData = np.where( fcstMid >= cldfraThresh , qcData, missing_values) + elif condition.lower().strip() == 'fcstHigh'.lower(): # low clouds in forecast (layers possible); obs could be anything + qcData = np.where( fcstHigh >= cldfraThresh , qcData, missing_values) + elif condition.lower().strip() == 'cloudEventLow'.lower(): + if dataSource == 1: this_var = np.where( fcstLow >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( thisGOESData >= PTOP_LOW, yes, no ) + elif condition.lower().strip() == 'cloudEventMid'.lower(): + if dataSource == 1: this_var = np.where( fcstMid >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( (thisGOESData < PTOP_LOW) & (thisGOESData >= PTOP_MID), yes, no ) + elif condition.lower().strip() == 'cloudEventHigh'.lower(): + if dataSource == 1: this_var = np.where( fcstHigh >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( (thisGOESData < PTOP_MID) & (thisGOESData >= PTOP_HIGH), yes, no ) + elif condition.lower().strip() == 'cloudEventTot'.lower(): + if dataSource == 1: this_var = np.where( fcstTotCldFra >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( thisGOESData > 0.0, yes, no ) + elif condition.lower().strip() == 'all': + print("not doing any conditional verification or stratifying by event") + else: + print("condition = ",condition," not recognized.") + sys.exit() + #elif condition.lower().strip() == '4x4table'.lower(): + #if dataSource == 1: + # this_var = np.where( fcstLow >= cldfraThresh, yesLow, no ) + # this_var = this_var + np.where( fcstMid >= cldfraThresh, yesMid, no ) + # this_var = this_var + np.where( fcstHigh >= cldfraThresh, yesHigh, no ) + print('number removed = ', (qcData==missing_values).sum()) + #print('number passed = ', qcData.shape[0] - (qcData==missing_values).sum()) + else: + print('shape mismatch') + return -99999, -99999 + + # Append to arrays + allData.append(this_var) + allDataQC.append(qcData) + + nc_fid.close() # done with the file, so close it before going to next file in loop + + # We're now all done looping over the individul files + + # Get the indices with acceptable QC + allQC = np.concatenate(allDataQC) # Put list of numpy arrays into a single long 1-D numpy array. All QC data. + idx = np.where(allQC==0) # returns indices + + # Now get all the forecast/observed brightness temperature data with acceptable QC + this_var = np.concatenate(allData)[idx] # Put list of numpy arrays into a single long 1-D numpy array. This is all the forecast/obs data with good QC + numObs = this_var.shape[0] # number of points with good QC for this channel + print('Number of obs :',numObs) + + # Assume all the points actually fit into a square grid. Get the side of the square (use ceil to round up) + if numObs > 0: + l = np.ceil(np.sqrt(numObs)).astype('int') # Length of the side of the square + + # Make an array that can be reshaped into the square + raw_data1D = np.full(l*l,np.nan) # Initialize 1D array of length l**2 to np.nan + raw_data1D[0:numObs] = this_var[:] # Fill data to the extent possible. There will be some np.nan values at the end + raw_data = np.reshape(raw_data1D,(l,l)) # Reshape into "square grid" + + raw_data = np.where(np.isnan(raw_data), missing_values, raw_data) # replace np.nan to missing_values (for MET) + + met_data=raw_data.astype(float) # Give MET this info + + # Now need to tell MET the "grid" for the data + # Make a fake lat/lon grid going from 0.0 to 50.0 degrees, with the interval determined by number of points + griddedDatasets[source]['latDef'][0] = 0.0 # starting point + griddedDatasets[source]['latDef'][1] = np.diff(np.linspace(0,50,l)).round(6)[0] # interval (degrees) + griddedDatasets[source]['latDef'][2] = int(l) # number of points + griddedDatasets[source]['lonDef'][0:3] = griddedDatasets[source]['latDef'] + + gridInfo = getGridInfo(source, griddedDatasets[source]['gridType']) # 'LatLon' gridType + return met_data, gridInfo + + else: + return -99999, -99999 + +########### +def getGridInfo(source,gridType): + + if gridType == 'LatLon': + latDef = griddedDatasets[source]['latDef'] + lonDef = griddedDatasets[source]['lonDef'] + gridInfo = { + 'type': gridType, + 'name': source, + 'lat_ll': latDef[0], #-90.000, + 'lon_ll': lonDef[0], #-180.000, + 'delta_lat': latDef[1], #0.5000, + 'delta_lon': lonDef[1], #0.625, + 'Nlat': latDef[2], #361, + 'Nlon': lonDef[2], #576, + } + elif gridType == 'Gaussian': + gridInfo = { + 'type': gridType, + 'name': source, + 'nx': griddedDatasets[source]['nx'], + 'ny': griddedDatasets[source]['ny'], + 'lon_zero': griddedDatasets[source]['lon_zero'], + } + + return gridInfo + +def getAttrArray(source,variable,initTime,validTime): + + init = dt.datetime.strptime(initTime,"%Y%m%d%H") + valid = dt.datetime.strptime(validTime,"%Y%m%d%H") + lead, rem = divmod((valid-init).total_seconds(), 3600) + + attrs = { + + 'valid': valid.strftime("%Y%m%d_%H%M%S"), + 'init': init.strftime("%Y%m%d_%H%M%S"), + 'lead': str(int(lead)), + 'accum': '000000', + + 'name': variable, #'MERRA2_Cloud_Percentage' + 'long_name': variable, #'Cloud Percentage Levels', + 'level': 'ALL', + 'units': verifVariables[variable]['units'], + + 'grid': getGridInfo(source,griddedDatasets[source]['gridType']) + } + + #print(attrs) + #print(griddedDatasets[source]) + + return attrs + +######## END FUNCTIONS ########## + + +#if __name__ == "__main__": +dataFile, dataSource, variable, i_date, v_date, flag = sys.argv[1].split(":") +met_data = getDataArray(dataFile,dataSource,variable,flag) +attrs = getAttrArray(dataSource,variable,i_date,v_date) +print(attrs) diff --git a/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.conf b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.conf new file mode 100644 index 0000000000..df54f704af --- /dev/null +++ b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.conf @@ -0,0 +1,202 @@ +[config] + +# Documentation for this use case can be found at +# https://metplus.readthedocs.io/en/latest/generated/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.html + +# For additional information, please see the METplus Users Guide. +# https://metplus.readthedocs.io/en/latest/Users_Guide + +# ### +# Processes to run +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#process-list +### + + +PROCESS_LIST = GridStat, GridStat(nbr), GridStat(prob) + +### +# Time Info +# LOOP_BY options are INIT, VALID, RETRO, and REALTIME +# If set to INIT or RETRO: +# INIT_TIME_FMT, INIT_BEG, INIT_END, and INIT_INCREMENT must also be set +# If set to VALID or REALTIME: +# VALID_TIME_FMT, VALID_BEG, VALID_END, and VALID_INCREMENT must also be set +# LEAD_SEQ is the list of forecast leads to process +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#timing-control +### + +LOOP_BY = INIT +INIT_TIME_FMT = %Y%m%d%H +INIT_BEG=2020072300 +INIT_END=2020072300 +INIT_INCREMENT = 12H + +LEAD_SEQ = 36 + +LOOP_ORDER = times + +### +# File I/O +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#directory-and-filename-template-info +### + +FCST_GRID_STAT_INPUT_DIR = +FCST_GRID_STAT_INPUT_TEMPLATE = PYTHON_NUMPY + +OBS_GRID_STAT_INPUT_DIR = +OBS_GRID_STAT_INPUT_TEMPLATE = PYTHON_NUMPY + +GRID_STAT_CLIMO_MEAN_INPUT_DIR = +GRID_STAT_CLIMO_MEAN_INPUT_TEMPLATE = + +GRID_STAT_CLIMO_STDEV_INPUT_DIR = +GRID_STAT_CLIMO_STDEV_INPUT_TEMPLATE = + +GRID_STAT_OUTPUT_DIR = {OUTPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac +GRID_STAT_OUTPUT_TEMPLATE = + + +### +# Field Info +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#field-info +### + +MODEL = MPAS +OBTYPE = MERRA2 + +CONFIG_DIR = {PARM_BASE}/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac + +FCST_VAR1_NAME = {CONFIG_DIR}/read_input_data.py {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac/diag.{valid?fmt=%Y-%m-%d_%H}.00.00_latlon.nc:{MODEL}:totalCloudFrac:{init?fmt=%Y%m%d%H}:{valid?fmt=%Y%m%d%H}:1 +FCST_VAR1_LEVELS = +FCST_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 +FCST_VAR2_NAME = {CONFIG_DIR}/read_input_data.py {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac/diag.{valid?fmt=%Y-%m-%d_%H}.00.00_latlon.nc:{MODEL}:lowCloudFrac:{init?fmt=%Y%m%d%H}:{valid?fmt=%Y%m%d%H}:1 +FCST_VAR2_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 + +OBS_VAR1_NAME = {CONFIG_DIR}/read_input_data.py {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac/MERRA2_400.tavg1_2d_rad_Nx.{valid?fmt=%Y%m%d}.nc4:{OBTYPE}:totalCloudFrac:{init?fmt=%Y%m%d%H}:{valid?fmt=%Y%m%d%H}:2 +OBS_VAR1_LEVELS = +OBS_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 +OBS_VAR2_NAME = {CONFIG_DIR}/read_input_data.py {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac/MERRA2_400.tavg1_2d_rad_Nx.{valid?fmt=%Y%m%d}.nc4:{OBTYPE}:lowCloudFrac:{init?fmt=%Y%m%d%H}:{valid?fmt=%Y%m%d%H}:2 +OBS_VAR2_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 + + +### +# GridStat Settings +# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#gridstat +### + +#LOG_GRID_STAT_VERBOSITY = 2 + +GRID_STAT_CONFIG_FILE = {PARM_BASE}/met_config/GridStatConfig_wrapped + +GRID_STAT_REGRID_TO_GRID = FCST +GRID_STAT_REGRID_METHOD = BILIN +GRID_STAT_REGRID_WIDTH = 2 + + +GRID_STAT_DESC = + +FCST_GRID_STAT_FILE_WINDOW_BEGIN = 0 +FCST_GRID_STAT_FILE_WINDOW_END = 0 +OBS_GRID_STAT_FILE_WINDOW_BEGIN = 0 +OBS_GRID_STAT_FILE_WINDOW_END = 0 + +GRID_STAT_NEIGHBORHOOD_WIDTH = 1 +GRID_STAT_NEIGHBORHOOD_SHAPE = SQUARE + +GRID_STAT_NEIGHBORHOOD_COV_THRESH = >=0.5 + +GRID_STAT_ONCE_PER_FIELD = False + +FCST_IS_PROB = false + +FCST_GRID_STAT_PROB_THRESH = ==0.1 + +OBS_IS_PROB = false + +OBS_GRID_STAT_PROB_THRESH = ==0.1 + +GRID_STAT_OUTPUT_PREFIX = {MODEL}_to_{OBTYPE}_F{lead?fmt=%H}_CloudFracs + +GRID_STAT_OUTPUT_FLAG_FHO = STAT +GRID_STAT_OUTPUT_FLAG_CTC = STAT +GRID_STAT_OUTPUT_FLAG_CTS = STAT +GRID_STAT_OUTPUT_FLAG_CNT = STAT +GRID_STAT_OUTPUT_FLAG_SL1L2 = STAT +GRID_STAT_OUTPUT_FLAG_GRAD = STAT + +GRID_STAT_NC_PAIRS_FLAG_LATLON = TRUE +GRID_STAT_NC_PAIRS_FLAG_RAW = TRUE +GRID_STAT_NC_PAIRS_FLAG_DIFF = TRUE +GRID_STAT_NC_PAIRS_FLAG_CLIMO = FALSE +GRID_STAT_NC_PAIRS_FLAG_GRADIENT = TRUE +GRID_STAT_NC_PAIRS_FLAG_APPLY_MASK = TRUE + + +GRID_STAT_MASK_POLY = {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac/GPP_17km_60S_60N_mask.nc + +[nbr] + +FCST_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0, >SFP20, >SFP30, >SFP40, >SFP50, >SFP60, >SFP70, >SFP80 + +OBS_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0, >SOP20, >SOP30, >SOP40, >SOP50, >SOP60, >SOP70, >SOP80 + +GRID_STAT_NEIGHBORHOOD_WIDTH = 3, 5, 7, 9 +GRID_STAT_NEIGHBORHOOD_SHAPE = CIRCLE + +GRID_STAT_NEIGHBORHOOD_COV_THRESH = >0.0 + +GRID_STAT_OUTPUT_FLAG_FHO = NONE +GRID_STAT_OUTPUT_FLAG_CTC = NONE +GRID_STAT_OUTPUT_FLAG_CTS = NONE +GRID_STAT_OUTPUT_FLAG_CNT = NONE +GRID_STAT_OUTPUT_FLAG_SL1L2 = NONE +GRID_STAT_OUTPUT_FLAG_NBRCTC = STAT +GRID_STAT_OUTPUT_FLAG_NBRCTS = STAT +GRID_STAT_OUTPUT_FLAG_NBRCNT = STAT +GRID_STAT_OUTPUT_FLAG_GRAD = NONE + +GRID_STAT_NC_PAIRS_FLAG_LATLON = TRUE +GRID_STAT_NC_PAIRS_FLAG_RAW = TRUE +GRID_STAT_NC_PAIRS_FLAG_DIFF = TRUE +GRID_STAT_NC_PAIRS_FLAG_NBRHD = TRUE +GRID_STAT_NC_PAIRS_FLAG_GRADIENT = TRUE +GRID_STAT_NC_PAIRS_FLAG_APPLY_MASK = TRUE + +GRID_STAT_OUTPUT_PREFIX = {MODEL}_to_{OBTYPE}_F{lead?fmt=%H}_CloudFracs_NBR + +[prob] + +OBS_VAR1_THRESH = gt0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 +OBS_VAR2_THRESH = gt0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 +FCST_IS_PROB = TRUE + +FCST_VAR1_THRESH = >0.1, >0.2, >0.3, >0.4, >0.5, >0.6, >0.7, >0.8, >0.9, >1.0 +FCST_VAR2_THRESH = >0.1, >0.2, >0.3, >0.4, >0.5, >0.6, >0.7, >0.8, >0.9, >1.0 +GRID_STAT_NEIGHBORHOOD_WIDTH = 3, 5, 7, 9 +GRID_STAT_NEIGHBORHOOD_SHAPE = CIRCLE + +GRID_STAT_NEIGHBORHOOD_COV_THRESH = >0.0 + +GRID_STAT_OUTPUT_FLAG_FHO = NONE +GRID_STAT_OUTPUT_FLAG_CTC = NONE +GRID_STAT_OUTPUT_FLAG_CTS = NONE +GRID_STAT_OUTPUT_FLAG_CNT = NONE +GRID_STAT_OUTPUT_FLAG_SL1L2 = NONE +GRID_STAT_OUTPUT_FLAG_NBRCTC = NONE +GRID_STAT_OUTPUT_FLAG_NBRCTS = NONE +GRID_STAT_OUTPUT_FLAG_NBRCNT = NONE +GRID_STAT_OUTPUT_FLAG_GRAD = NONE +GRID_STAT_OUTPUT_FLAG_PCT = STAT +GRID_STAT_OUTPUT_FLAG_PSTD = STAT +GRID_STAT_OUTPUT_FLAG_PJC = STAT +GRID_STAT_OUTPUT_FLAG_PRC = STAT + +GRID_STAT_NC_PAIRS_FLAG_LATLON = TRUE +GRID_STAT_NC_PAIRS_FLAG_RAW = TRUE +GRID_STAT_NC_PAIRS_FLAG_DIFF = TRUE +GRID_STAT_NC_PAIRS_FLAG_NBRHD = FALSE +GRID_STAT_NC_PAIRS_FLAG_GRADIENT = TRUE +GRID_STAT_NC_PAIRS_FLAG_APPLY_MASK = TRUE + +GRID_STAT_OUTPUT_PREFIX = {MODEL}_to_{OBTYPE}_F{lead?fmt=%H}_CloudFracs_PROB + diff --git a/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac/read_input_data.py b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac/read_input_data.py new file mode 100755 index 0000000000..aa436135a6 --- /dev/null +++ b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac/read_input_data.py @@ -0,0 +1,911 @@ +#this code was provided by Craig Schwartz +#and is largely unaltered from its original +#function. + +#from __future__ import print_function +import os +import sys +import numpy as np +import datetime as dt +from netCDF4 import Dataset # http://code.google.com/p/netcdf4-python/ +from scipy.interpolate import NearestNDInterpolator, LinearNDInterpolator +#### for Plotting +import matplotlib.cm as cm +import matplotlib.axes as maxes +import matplotlib.pyplot as plt +from mpl_toolkits.axes_grid1 import make_axes_locatable +#from mpl_toolkits.basemap import Basemap +import fnmatch +import pygrib +import pickle as pk +##### + +########################################### + +missing_values = -9999.0 # for MET + +# UPP top layer bounds (Pa) for cloud layers +PTOP_LOW_UPP = 64200. # low for > 64200 Pa +PTOP_MID_UPP = 35000. # mid between 35000-64200 Pa +PTOP_HIGH_UPP = 15000. # high between 15000-35000 Pa + +# Values for 4 x 4 contingency table +Na, Nb, Nc, Nd = 1, 2, 3, 4 +Ne, Nf, Ng, Nh = 5, 6, 7, 8 +Ni, Nj, Nk, Nl = 9, 10, 11, 12 +Nm, Nn, No, Np = 13, 14, 15, 16 + +# Notes: +# 1) Entry for 'point' is for point-to-point comparison and is all dummy data (except for gridType) that is overwritten by point2point +# 2) ERA5 on NCAR CISL RDA changed at some point. Old is ERA5_2017 (not used anymore), new is ERA5, which we'll use for 2020 data +griddedDatasets = { + 'MERRA2' : { 'gridType':'LatLon', 'latVar':'lat', 'latDef':[-90.0,0.50,361], 'lonVar':'lon', 'lonDef':[-180.0,0.625,576], 'flipY':True, 'ftype':'nc'}, + 'SATCORPS' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[-180.0,0.3125,1152], 'flipY':False, 'ftype':'nc' }, + 'ERA5_2017': { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-89.7848769072,0.281016829130516,640], 'lonVar':'longitude', 'lonDef':[0.0,0.28125,1280], 'flipY':False, 'ftype':'nc' }, + 'ERA5' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'nc' }, + 'GFS' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'grib'}, + 'GALWEM' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':True, 'ftype':'grib'}, + 'GALWEM17' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-89.921875,0.156250,1152], 'lonVar':'longitude', 'lonDef':[0.117187,0.234375,1536], 'flipY':False, 'ftype':'grib'}, + 'WWMCA' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'grib'}, + 'MPAS' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'nc'}, + 'SAT_WWMCA_MEAN' : { 'gridType':'LatLon', 'latVar':'lat','latDef':[-90.0,0.25,721], 'lonVar':'lon', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'nc' }, + 'point' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.156250,1152], 'lonVar':'longitude', 'lonDef':[0.117187,0.234375,1536], 'flipY':False, 'ftype':'nc'}, +} + #TODO:Correct one, but MET can ingest a Gaussian grid only in Grib2 format (from Randy B.) + #'ERA5' : { 'gridType':'Gaussian', 'nx':1280, 'ny':640, 'lon_zero':0, 'latVar':'latitude', 'lonVar':'longitude', 'flipY':False, }, + +#GALWEM, both 17-km and 0.25-degree +lowCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':3, 'typeOfFirstFixedSurface':10, 'shortName':'lcc' } +midCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':4, 'typeOfFirstFixedSurface':10, 'shortName':'mcc' } +highCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':5, 'typeOfFirstFixedSurface':10, 'shortName':'hcc' } +totalCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':10, 'shortName':'tcc' } +cloudTopHeight_GALWEM = { 'parameterCategory':6, 'parameterNumber':12, 'typeOfFirstFixedSurface':3, 'shortName':'cdct' } +cloudBaseHeight_GALWEM = { 'parameterCategory':6, 'parameterNumber':11, 'typeOfFirstFixedSurface':2, 'shortName':'cdcb' } + +#GFS +lowCloudFrac_GFS = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':214, 'shortName':'tcc' } +midCloudFrac_GFS = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':224, 'shortName':'tcc' } +highCloudFrac_GFS = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':234, 'shortName':'tcc' } + +#WWMCA +totalCloudFrac_WWMCA = { 'parameterName':71, 'typeOfLevel':'entireAtmosphere', 'level':0 } + +cloudTopHeightLev1_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':1 } +cloudTopHeightLev2_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':2 } +cloudTopHeightLev3_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':3 } +cloudTopHeightLev4_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':4 } +cloudTopHeight_WWMCA = [ cloudTopHeightLev1_WWMCA, cloudTopHeightLev2_WWMCA, cloudTopHeightLev3_WWMCA, cloudTopHeightLev4_WWMCA ] + +cloudBaseHeightLev1_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':1 } +cloudBaseHeightLev2_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':2 } +cloudBaseHeightLev3_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':3 } +cloudBaseHeightLev4_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':4 } +cloudBaseHeight_WWMCA = [ cloudBaseHeightLev1_WWMCA, cloudBaseHeightLev2_WWMCA, cloudBaseHeightLev3_WWMCA, cloudBaseHeightLev4_WWMCA ] + +verifVariablesModel = { + 'binaryCloud' : {'GFS':[''], 'GALWEM17':[totalCloudFrac_GALWEM], 'GALWEM':[totalCloudFrac_GALWEM], 'MPAS':['cldfrac_tot_UM_rand']}, + 'totalCloudFrac' : {'GFS':[''], 'GALWEM17':[totalCloudFrac_GALWEM], 'GALWEM':[totalCloudFrac_GALWEM], 'MPAS':['cldfrac_tot_UM_rand']}, + 'lowCloudFrac' : {'GFS':[lowCloudFrac_GFS], 'GALWEM17':[lowCloudFrac_GALWEM], 'GALWEM':[lowCloudFrac_GALWEM], 'MPAS':['cldfrac_low_UM']}, + 'midCloudFrac' : {'GFS':[midCloudFrac_GFS], 'GALWEM17':[midCloudFrac_GALWEM], 'GALWEM':[midCloudFrac_GALWEM], 'MPAS':['cldfrac_mid_UM']}, + 'highCloudFrac' : {'GFS':[highCloudFrac_GFS], 'GALWEM17':[highCloudFrac_GALWEM], 'GALWEM':[highCloudFrac_GALWEM], 'MPAS':['cldfrac_high_UM']}, + 'cloudTopHeight' : {'GFS':[''] , 'GALWEM17':[cloudTopHeight_GALWEM], 'GALWEM':[cloudTopHeight_GALWEM], 'MPAS':['cldht_top_UM']}, + 'cloudBaseHeight' : {'GFS':[''] , 'GALWEM17':[cloudBaseHeight_GALWEM], 'GALWEM':[cloudBaseHeight_GALWEM], 'MPAS':['cldht_base_UM']}, +} + +cloudFracCatThresholds = '>0, <10.0, >=10.0, >=20.0, >=30.0, >=40.0, >=50.0, >=60.0, >=70.0, >=80.0, >=90.0' # MET format string +brightnessTempThresholds = '<280.0, <275.0, <273.15, <270.0, <265.0, <260.0, <255.0, <250.0, <245.0, <240.0, <235.0, <230.0, <225.0, <220.0, <215.0, <210.0, <=SFP1, <=SFP5, <=SFP10, <=SFP25, <=SFP50, >=SFP50, >=SFP75, >=SFP90, >=SFP95, >=SFP99' +verifVariables = { + 'binaryCloud' : { 'MERRA2':['CLDTOT'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['TCC'], 'WWMCA':[totalCloudFrac_WWMCA], 'SAT_WWMCA_MEAN':['Mean_WWMCA_SATCORPS'], 'units':'NA', 'thresholds':'>0.0', 'interpMethod':'nearest' }, + 'totalCloudFrac' : { 'MERRA2':['CLDTOT'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['tcc'], 'WWMCA':[totalCloudFrac_WWMCA], 'SAT_WWMCA_MEAN':['Mean_WWMCA_SATCORPS'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'lowCloudFrac' : { 'MERRA2':['CLDLOW'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['lcc'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'midCloudFrac' : { 'MERRA2':['CLDMID'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['MCC'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'highCloudFrac' : { 'MERRA2':['CLDHGH'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['HCC'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'cloudTopTemp' : { 'MERRA2':['CLDTMP'], 'SATCORPS':['cloud_temperature_top_level'], 'ERA5':[''] , 'units':'K', 'thresholds':'NA', 'interpMethod':'bilin'}, + 'cloudTopPres' : { 'MERRA2':['CLDPRS'], 'SATCORPS':['cloud_pressure_top_level'], 'ERA5':[''] , 'units':'hPa', 'thresholds':'NA', 'interpMethod':'bilin'}, + 'cloudTopHeight' : { 'MERRA2':[''] , 'SATCORPS':['cloud_height_top_level'], 'ERA5':[''] , 'WWMCA':cloudTopHeight_WWMCA, 'units':'m', 'thresholds':'NA', 'interpMethod':'nearest'}, + 'cloudBaseHeight': { 'MERRA2':[''] , 'SATCORPS':['cloud_height_base_level'], 'ERA5':['cbh'], 'WWMCA':cloudBaseHeight_WWMCA, 'units':'m', 'thresholds':'NA', 'interpMethod':'nearest'}, + 'cloudCeiling' : { 'MERRA2':[''] , 'SATCORPS':[''], 'ERA5':[''] , 'units':'m', 'thresholds':'NA', 'interpMethod':'bilin'}, + 'brightnessTemp' : { 'MERRA2':[''] , 'SATCORPS':[''], 'ERA5':[''] , 'units':'K', 'thresholds':brightnessTempThresholds, 'interpMethod':'bilin'}, +} + +# Combine the two dictionaries +# Only reason verifVariablesModel exists is just for space--verifVaribles gets too long if we keep adding more datasets +for key in verifVariablesModel.keys(): + x = verifVariablesModel[key] + for key1 in x.keys(): + verifVariables[key][key1] = x[key1] + +#f = '/glade/u/home/schwartz/cloud_verification/GFS_grib_0.25deg/2018112412/gfs.0p25.2018112412.f006.grib2' +#grbs = pygrib.open(f) +#idx = pygrib.index(f,'parameterCategory','parameterNumber','typeOfFirstFixedSurface') +#model = 'GFS' +#variable = 'totCloudCover' +#x = verifVariablesModel[variable][model] # returns a list, whose ith element is a dictionary +# e.g., idx(parameterCategory=6,parameterNumber=1,typeOfFirstFixedSurface=234) +#idx(parameterCategory=x[0]['parameterCategory'],parameterNumber=x[0]['parameterNumber'],typeOfFirstFixedSurface=x[0]['typeOfFirstFixedSurface']) + +# to read in an environmental variable +#x = os.getenv('a') # probably type string no matter what + +########### + +def getThreshold(variable): + x = verifVariables[variable]['thresholds'] + print(x) # needed for python 3 to read variable into csh variable + return x + +def getInterpMethod(variable): + x = verifVariables[variable]['interpMethod'].upper() + print(x) # needed for python 3 to read variable into csh variable + return x + +def getTotalCloudFrac(source,data): + if source == 'SATCORPS': + # x = data[0][0,:,:,0] * 1.0E-2 # scaling + x = (data[0][0,:,:,1] + data[0][0,:,:,2] + data[0][0,:,:,3])*1.0E-2 # scaling + # y = data[0] + # x = np.sum( y[:,:,:,1:4],axis=3) + elif source == 'MERRA2': +# x = ( data[0][0,:,:]+data[1][0,:,:]+data[2][0,:,:] ) *100.0 # the ith element of data is a numpy array + x = data[0][0,:,:] * 100.0 # the ith element of data is a numpy array + print(x.min(), x.max()) + elif source == 'ERA5': + try: x = data[0][0,0,:,:] * 100.0 + except: x = data[0][0,:,:] * 100.0 + elif source == 'MPAS': + x = data[0][0,:,:] * 100.0 + elif source == 'SAT_WWMCA_MEAN': + x = data[0][0,:,:] # already in % + else: + x = data[0] + + # This next line is WRONG. + # Missing should be set to missing + # Then, the non-missing values are 1s and 0s + #output = np.where(x > 0.0, x, 0.0) + #output = np.where(x < 0.0, -9999.0, x) # missing. currently used for SATCORPS + + x = np.where( x < 0.0 , 0.0, x) # Force negative values to zero + x = np.where( x > 100.0, 100.0, x) # Force values > 100% to 100% + return x + +def getBinaryCloud(source,data): + y = getTotalCloudFrac(source,data) + # keep NaNs as is, but then set everything else to either 100% or 0% + x = np.where( np.isnan(y), y, np.where(y > 0.0, 100.0, 0.0) ) + return x + +def getLayerCloudFrac(source,data,layer): + if source == 'SATCORPS': + if layer.lower().strip() == 'low' : i = 1 + if layer.lower().strip() == 'mid' : i = 2 + if layer.lower().strip() == 'high' : i = 3 + x = data[0][0,:,:,i] * 1.0E-2 # scaling + elif source == 'MERRA2': + x = data[0][0,:,:] * 100.0 + elif source == 'ERA5': + try: x = data[0][0,0,:,:] * 100.0 + except: x = data[0][0,:,:] * 100.0 + elif source == 'MPAS': + x = data[0][0,:,:] * 100.0 + else: + x = data[0] + + x = np.where( x < 0.0, 0.0, x) # Force negative values to zero + x = np.where( x > 100.0, 100.0, x) # Force values > 100% to 100% + + return x + +def getCloudTopTemp(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E-2 # scaling + elif source == 'MERRA2': + x = data[0][0,:,:] + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + else: + x = data[0] + return x + +def getCloudTopPres(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E-1 # scaling + elif source == 'MERRA2': + x = data[0][0,:,:] * 1.0E-2 # scaling [Pa] -> [hPa] + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + else: + x = data[0] + return x + +def getCloudTopHeight(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E+1 # scaling to [meters] + elif source == 'MERRA2': + x = data[0][0,:,:] #TBD + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + elif source == 'GALWEM17': + x = data[0] * 1000.0 * 0.3048 # kilofeet -> meters + elif source == 'MPAS': + x = data[0][0,:,:] # already in meters + elif source == 'WWMCA': + # data is a list (should be length 4) + if len(data) != 4: + print('error with WWMCA Cloud top height') + sys.exit() + tmp = np.array(data) # already in meters + tmp = np.where( tmp <= 0, np.nan, tmp) # replace 0 or negative values with NAN + x = np.nanmax(tmp,axis=0) # get maximum cloud top height across all layers + else: + x = data[0] + + # Eliminate unphysical values (assume cloud top shouldn't be > 50000 meters) + y = np.where( x > 50000.0 , np.nan, x ) + + return y + +def getCloudBaseHeight(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E+1 # scaling to [meters] + elif source == 'MERRA2': + x = data[0][0,:,:] #TBD + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + elif source == 'GALWEM17': + x = data[0] * 1000.0 * 0.3048 # kilofeet -> meters + elif source == 'MPAS': + x = data[0][0,:,:] # already in meters + elif source == 'WWMCA': + # data is a list (should be length 4) + if len(data) != 4: + print('error with WWMCA Cloud base height') + sys.exit() + tmp = np.array(data) # already in meters + tmp = np.where( tmp <= 0, np.nan, tmp) # replace 0 or negative values with NAN + x = np.nanmin(tmp,axis=0) # get lowest cloud base over all layers + else: + x = data[0] + + # Eliminate unphysical values (assume cloud base shouldn't be > 50000 meters) + y = np.where( x > 50000.0 , np.nan, x ) + + return y + +def getCloudCeiling(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] #TBD + elif source == 'MERRA2': + x = data[0][0,:,:] #TBD + elif source == 'ERA5': + try: x = data[0][0,0,:,:] # TBD + except: x = data[0][0,:,:] + return x + +# add other functions for different variables + +########### + +def getDataArray(inputFile,source,variable,dataSource): + # 1) inputFile: File name--either observations or forecast + # 2) source: Obsevation source (e.g., MERRA, SATCORP, etc.) + # 3) variable: Variable to verify + # 4) dataSource: If 1, process forecast file. If 2 process obs file. + +# # specifying names here temporarily. file names should be passed in to python from shell script +# if source == 'merra': nc_file = '/gpfs/fs1/scratch/schwartz/MERRA/MERRA2_400.tavg1_2d_rad_Nx.20181101.nc4' +# elif source == 'satcorp': nc_file = '/glade/scratch/bjung/met/test_satcorps/GEO-MRGD.2018334.0000.GRID.NC' +# elif source == 'era5': nc_file = '/glade/scratch/bjung/met/test_era5/e5.oper.fc.sfc.instan.128_164_tcc.regn320sc.2018111606_2018120112.nc' + + source = source.upper().strip() # Force uppercase and get rid of blank spaces, for safety + + print('dataSource = ',dataSource) + + ftype = griddedDatasets[source]['ftype'].lower().strip() + + # Get file handle + if ftype == 'nc': + nc_fid = Dataset(inputFile, "r", format="NETCDF4") + #nc_fid.set_auto_scale(True) + elif ftype == 'grib': + if source == 'WWMCA': + idx = pygrib.index(inputFile,'parameterName','typeOfLevel','level') + else: + idx = pygrib.index(inputFile,'parameterCategory','parameterNumber','typeOfFirstFixedSurface') + + # dataSource == 1 means forecast, 2 means obs +# if dataSource == 1: varsToRead = verifVariablesModel[variable][source] # if ftype == 'grib', returns a list whose ith element is a dictionary. otherwise, just a list +# if dataSource == 2: varsToRead = verifVariables[variable][source] # returns a list + varsToRead = verifVariables[variable][source] # if ftype == 'grib', returns a list whose ith element is a dictionary. otherwise, just a list + + print('Trying to read ',inputFile) + + # Get lat/lon information--currently not used + #latVar = griddedDatasets[source]['latVar'] + #lonVar = griddedDatasets[source]['lonVar'] + #lats = np.array(nc_fid.variables[latVar][:]) # extract/copy the data + #lons = np.array(nc_fid.variables[lonVar][:] ) + + #print(lats.max()) + #print(lons.max()) + + # one way to deal with scale factors + # probably using something like nc_fid.set_auto_scale(True) is better... + #latMax = lats.max() + #while latMax > 90.0: + # lons = lons * 0.1 + # lats = lats * 0.1 + # latMax = lats.max() + + # get data + data = [] + for v in varsToRead: + if ftype == 'grib': + if source == 'WWMCA': + x = idx(parameterName=v['parameterName'],typeOfLevel=v['typeOfLevel'],level=v['level'])[0] # by getting element 0, you get a pygrib message + else: + # e.g., idx(parameterCategory=6,parameterNumber=1,typeOfFirstFixedSurface=234) + if ( variable == 'cloudTopHeight' or variable == 'cloudBaseHeight') and source == 'GALWEM17': + x = idx(parameterCategory=v['parameterCategory'],parameterNumber=v['parameterNumber'],typeOfFirstFixedSurface=v['typeOfFirstFixedSurface'])[1] # by getting element 1, you get a pygrib message + else: + x = idx(parameterCategory=v['parameterCategory'],parameterNumber=v['parameterNumber'],typeOfFirstFixedSurface=v['typeOfFirstFixedSurface'])[0] # by getting element 0, you get a pygrib message + if x.shortName != v['shortName']: print('Name mismatch!') + #ADDED BY JOHN O + print(x) + print('Reading ', x.shortName, 'at level ', x.typeOfFirstFixedSurface) + read_var = x.values # same x.data()[0] + read_missing = x.missingValue + print('missing value = ',read_missing) + + # The missing value (read_missing) for GALWEM17 and GALWEM cloud base/height is 9999, which is not the best choice because + # those could be actual values. So we need to use the masked array part (below) to handle which + # values are missing. We also set read_missing to something unphysical to essentially disable it. + # Finally, if we don't change the 'missingValue' property in the GRIB2 file we are eventually outputting, + # the bitmap will get all messed up, because it will be based on 9999 instead of $missing_values + if variable == 'cloudTopHeight' or variable == 'cloudBaseHeight': + read_missing = -9999. + x['missingValue'] = read_missing + if source == 'GALWEM17': + #These are masked numpy arrays, with mask = True where there is a missing value (no cloud) + #Use np.ma.filled to create an ndarray where mask = True values are set to np.nan + read_var = np.ma.filled(read_var.astype(read_var.dtype), np.nan) + elif ftype == 'nc': + read_var = nc_fid.variables[v] # extract/copy the data + try: + read_missing = read_var.missing_value # get variable attributes. Each dataset has own missing values. + except: + read_missing = -9999. # set a default missing value. probably only need to do this for MPAS + + print('Reading ', v) + + this_var = np.array( read_var ) # to numpy array + #print(read_missing, np.nan) + this_var = np.where( this_var==read_missing, np.nan, this_var ) + #print(this_var.shape) + data.append(this_var) # ith element of the list is a NUMPY ARRAY for the ith variable + #print(type(this_var)) + #print(type(data)) + + # Call a function to get the variable of interest. + # Add a new function for each variable + if variable == 'binaryCloud': raw_data = getBinaryCloud(source,data) + if variable == 'totalCloudFrac': raw_data = getTotalCloudFrac(source,data) + if variable == 'lowCloudFrac': raw_data = getLayerCloudFrac(source,data,'low') + if variable == 'midCloudFrac': raw_data = getLayerCloudFrac(source,data,'mid') + if variable == 'highCloudFrac': raw_data = getLayerCloudFrac(source,data,'high') + if variable == 'cloudTopTemp': raw_data = getCloudTopTemp(source,data) + if variable == 'cloudTopPres': raw_data = getCloudTopPres(source,data) + if variable == 'cloudTopHeight': raw_data = getCloudTopHeight(source,data) + if variable == 'cloudBaseHeight': raw_data = getCloudBaseHeight(source,data) + if variable == 'cloudCeiling': raw_data = getCloudCeiling(source,data) + + raw_data = np.where(np.isnan(raw_data), missing_values, raw_data) # replace np.nan to missing_values (for MET) + + # Array met_data is passed to MET + # Graphics should plot $met_data to make sure things look correct + if griddedDatasets[source]['flipY']: + print('flipping ',source,' data about y-axis') + met_data=np.flip(raw_data,axis=0).astype(float) + else: + met_data=raw_data.astype(float) + + # Make plotting optional or Just use plot_data_plane +# plt_data=np.where(met_data<0, np.nan, met_data) +# map=Basemap(projection='cyl',llcrnrlat=-90,urcrnrlat=90,llcrnrlon=-180,urcrnrlon=180,resolution='c') +# map.drawcoastlines() +# map.drawcountries() +# map.drawparallels(np.arange(-90,90,30),labels=[1,1,0,1]) +# map.drawmeridians(np.arange(0,360,60),labels=[1,1,0,1]) +# plt.contourf(lons,lats,plt_data,20,origin='upper',cmap=cm.Greens) #cm.gist_rainbow) +# title=source+"_"+variable+"_"+str(validTime) +# plt.title(title) +# plt.colorbar(orientation='horizontal') +# plt.savefig(title+".png") + + # If a forecast file, output a GRIB file with + # 1 record containing the met_data + # This is a hack, because right now, MET python embedding doesn't work with pygrib, + # so output the data to a temporary file, and then have MET read the temporary grib file. + # Starting with version 9.0 of MET, the hack isn't needed, and MET python embedding works with pygrib + outputFcstFile = False # MUST be True for MET version < 9.0. For MET 9.0+, optional + if dataSource == 1 and ftype == 'grib': + if outputFcstFile: + grbtmp = x + grbtmp['values']=met_data + grbout = open('temp_fcst.grb2','ab') + grbout.write(grbtmp.tostring()) + grbout.close() # Close the outfile GRIB file + print('Successfully output temp_fcst.grb2') + + # Close files + if ftype == 'grib': idx.close() # Close the input GRIB file + if ftype == 'nc': nc_fid.close() # Close the netCDF file + + return met_data + +def obsError(fcstData,obsErrorFile,validDate,dataSource): + + print('Adding noise to the cloud fraction fields') + print('Using obsErrorFile',obsErrorFile) + + # First load the obsError information + #obsErrorFile = 'ob_errors.pk' + infile = open(obsErrorFile,'rb') + binEdges, binStddev = pk.load(infile) # 'numpy.ndarray' types + infile.close() + + # Get 1d forecast data + shape = fcstData.shape + fcst = fcstData.flatten() + + # Set random number seed based on valid time and model + if dataSource.upper().strip() == 'MPAS': ii = 10 + elif dataSource.upper().strip() == 'GALWEM': ii = 20 + elif dataSource.upper().strip() == 'GFS': ii = 30 + np.random.seed(int(validDate*.1 + ii)) + + # Find which bin the data is in + for i in range(0,len(binEdges)-1): + idx = np.where( (fcst >= binEdges[i]) & (fcst < binEdges[i+1]) )[0] + n = len(idx) # number of points in the ith bin + if n > 0: # check for empty bins + randVals = np.random.normal(0,binStddev[i],n) + fcst[idx] = fcst[idx] + randVals + + # bound forecast values to between 0 and 100% + fcst = np.where( fcst < 0.0, 0.0, fcst) + fcst = np.where( fcst > 100.0, 100.0, fcst) + + # now reshape forecast data back to 2D + output = fcst.reshape(shape) + + # data will have NaNs where bad. + return output + +def getFcstCloudFrac(cfr,pmid,psfc,layerDefinitions): # cfr is cloud fraction(%), pmid is 3D pressure(Pa), psfc is surface pressure (Pa) code from UPP ./INITPOST.F + + if pmid.shape != cfr.shape: # sanity check + print('dimension mismatch bewteen cldfra and pressure') + sys.exit() + + nlocs, nlevs = pmid.shape + + if len(psfc) != nlocs: # another sanity check + print('dimension mismatch bewteen cldfra and surface pressure') + sys.exit() + + cfracl = np.zeros(nlocs) + cfracm = np.zeros(nlocs) + cfrach = np.zeros(nlocs) + + for i in range(0,nlocs): + + PTOP_HIGH = PTOP_HIGH_UPP + if layerDefinitions.upper().strip() == 'ERA5': + PTOP_LOW = 0.8*psfc[i] + PTOP_MID = 0.45*psfc[i] + elif layerDefinitions.upper().strip() == 'UPP': + PTOP_LOW = PTOP_LOW_UPP + PTOP_MID = PTOP_MID_UPP + + idxLow = np.where( pmid[i,:] >= PTOP_LOW)[0] # using np.where with just 1 argument returns tuple + idxMid = np.where( (pmid[i,:] < PTOP_LOW) & (pmid[i,:] >= PTOP_MID))[0] + idxHigh = np.where( (pmid[i,:] < PTOP_MID) & (pmid[i,:] >= PTOP_HIGH))[0] + + # use conditions in case all indices are missing + if (len(idxLow) >0 ): cfracl[i] = np.max( cfr[i,idxLow] ) + if (len(idxMid) >0 ): cfracm[i] = np.max( cfr[i,idxMid] ) + if (len(idxHigh) >0 ): cfrach[i] = np.max( cfr[i,idxHigh] ) + + tmp = np.vstack( (cfracl,cfracm,cfrach)) # stack the rows into one 2d array + cldfraMax = np.max(tmp,axis=0) # get maximum value across low/mid/high for each pixel (minimum overlap assumption) + + # This is the fortran code put into python format...double loop unnecessary and slow + #for i in range(0,nlocs): + # for k in range(0,nlevs): + # if pmid(i,k) >= PTOP_LOW: + # cfracl(i) = np.max( [cfracl(i),cfr(i,k)] ) # Low + # elif pmid(i,k) < PTOP_LOW and pmid(i,k) >= PTOP_MID: + # cfracm(i) = np.max( [cfracm(i),cfr(i,k)] ) # Mid + # elif pmid(i,k) < PTOP_MID and pmid(i,k) >= PTOP_HIGH: # High + # cfrach(i) = np.max( [cfrach(i),cfr(i,k)] ) + + return cfracl, cfracm, cfrach, cldfraMax + +def getGOES16LatLon(g16_data_file): + + # Start timer + startTime = dt.datetime.utcnow() + + # designate dataset + g16nc = Dataset(g16_data_file, 'r') + + # GOES-R projection info and retrieving relevant constants + proj_info = g16nc.variables['goes_imager_projection'] + lon_origin = proj_info.longitude_of_projection_origin + H = proj_info.perspective_point_height+proj_info.semi_major_axis + r_eq = proj_info.semi_major_axis + r_pol = proj_info.semi_minor_axis + + # Data info + lat_rad_1d = g16nc.variables['x'][:] + lon_rad_1d = g16nc.variables['y'][:] + + # close file when finished + g16nc.close() + g16nc = None + + # create meshgrid filled with radian angles + lat_rad,lon_rad = np.meshgrid(lat_rad_1d,lon_rad_1d) + + # lat/lon calc routine from satellite radian angle vectors + + lambda_0 = (lon_origin*np.pi)/180.0 + + a_var = np.power(np.sin(lat_rad),2.0) + (np.power(np.cos(lat_rad),2.0)*(np.power(np.cos(lon_rad),2.0)+(((r_eq*r_eq)/(r_pol*r_pol))*np.power(np.sin(lon_rad),2.0)))) + b_var = -2.0*H*np.cos(lat_rad)*np.cos(lon_rad) + c_var = (H**2.0)-(r_eq**2.0) + + r_s = (-1.0*b_var - np.sqrt((b_var**2)-(4.0*a_var*c_var)))/(2.0*a_var) + + s_x = r_s*np.cos(lat_rad)*np.cos(lon_rad) + s_y = - r_s*np.sin(lat_rad) + s_z = r_s*np.cos(lat_rad)*np.sin(lon_rad) + + lat = (180.0/np.pi)*(np.arctan(((r_eq*r_eq)/(r_pol*r_pol))*((s_z/np.sqrt(((H-s_x)*(H-s_x))+(s_y*s_y)))))) + lon = (lambda_0 - np.arctan(s_y/(H-s_x)))*(180.0/np.pi) + + # End timer + endTime = dt.datetime.utcnow() + time = (endTime - startTime).microseconds / (1000.0*1000.0) + print('took %f4.1 seconds to get GOES16 lat/lon'%(time)) + + return lon,lat # lat/lon are 2-d arrays + +# -- +def getGOESRetrivalData(goesFile,goesVar): + + if not os.path.exists(goesFile): + print(goesFile+' not there. exit') + sys.exit() + + # First get GOES lat/lon + goesLon2d, goesLat2d = getGOES16LatLon(goesFile) # 2-d arrays + goesLon = goesLon2d.flatten() # 1-d arrays + goesLat = goesLat2d.flatten() + + # Now open the file and get the data we want + nc_goes = Dataset(goesFile, "r", format="NETCDF4") + + # If the next line is true (it should be), this indicates the variable needs to be treated + # as an "unsigned 16-bit integer". This is a pain. So we must use the "astype" method + # to change the variable type BEFORE applying scale_factor and add_offset. After the conversion + # we then can manually apply the scale factor and offset + #goesVar = 'PRES' + goesVar = goesVar.strip() # for safety + if nc_goes.variables[goesVar]._Unsigned.lower().strip() == 'true': + nc_goes.set_auto_scale(False) # Don't automatically apply scale_factor and add_offset to variable + goesData2d = np.array( nc_goes.variables[goesVar]).astype(np.uint16) + goesData2d = goesData2d * nc_goes.variables[goesVar].scale_factor + nc_goes.variables[goesVar].add_offset + goesQC2d = np.array( nc_goes.variables['DQF']).astype(np.uint8) + else: + goesData2d = np.array( nc_goes.variables[goesVar]) + goesQC2d = np.array( nc_goes.variables['DQF']) + + # Make variables 1-d + goesQC = goesQC2d.flatten() + goesData = goesData2d.flatten() + nc_goes.close() + + # Get rid of NaNs; base it on longitude + goesData = goesData[~np.isnan(goesLon)] # Handle data arrays first before changing lat/lon itself + goesQC = goesQC[~np.isnan(goesLon)] + goesLon = goesLon[~np.isnan(goesLon)] # ~ is "logical not", also np.logical_not + goesLat = goesLat[~np.isnan(goesLat)] + if goesLon.shape != goesLat.shape: + print('GOES lat/lon shape mismatch') + sys.exit() + + # If goesQC == 0, good QC and there was a cloud with a valid pressure. + # If goesQC == 4, no cloud; probably clear sky. + # All other QC means no data, and we want to remove those points + idx = np.logical_or( goesQC == 0, goesQC == 4) # Only keep QC == 0 or 4 + goesData = goesData[idx] + goesQC = goesQC[idx] + goesLon = goesLon[idx] + goesLat = goesLat[idx] + + # Only QC with 0 or 4 are left; now set QC == 4 to missing to indicate clear sky + goesData = np.where( goesQC != 0, missing_values, goesData) + + # Get longitude to between (0,360) for consistency with JEDI files (this check is applied to JEDI files, too) + goesLon = np.where( goesLon < 0, goesLon + 360.0, goesLon ) + + print('Min GOES Lon = ',np.min(goesLon)) + print('Max GOES Lon = ',np.max(goesLon)) + + return goesLon, goesLat, goesData + +def point2point(source,inputDir,satellite,channel,goesFile,condition,layerDefinitions,dataSource): + + # Static Variables for QC and obs + qcVar = 'brightness_temperature_'+str(channel)+'@EffectiveQC' #'@EffectiveQC0' # QC variable + obsVar = 'brightness_temperature_'+str(channel)+'@ObsValue' # Observation variable + + # Get GOES-16 retrieval file with auxiliary information + if 'abi' in satellite or 'ahi' in satellite: + goesLon, goesLat, goesData = getGOESRetrivalData(goesFile,'PRES') # return 1-d arrays + lonlatGOES = np.array( list(zip(goesLon, goesLat))) # lon/lat pairs for each GOES ob (nobs_GOES, 2) + #print('shape lonlatGOES = ',lonlatGOES.shape) + print('getting data from ',goesFile) + myGOESInterpolator = NearestNDInterpolator(lonlatGOES,goesData) + + # First check to see if there's a concatenated file with all obs. + # If so, use that. If not, have to process one file per processor, which takes a lot more time + if os.path.exists(inputDir+'/obsout_omb_'+satellite+'_ALL.nc4'): + inputFiles = [inputDir+'/obsout_omb_'+satellite+'_ALL.nc4'] # needs to be in a list since we loop over inputFiles + else: + # Get list of OMB files to process. There is one file per processor. + # Need to get them in order so they are called in the same order for the + # forecast and observed passes through this subroutine. + files = os.listdir(inputDir) + inputFiles = fnmatch.filter(files,'obsout*_'+satellite+'*nc4') # returns relative path names + inputFiles = [inputDir+'/'+s for s in inputFiles] # add on directory name + inputFiles.sort() # Get in order from low to high + if len(inputFiles) == 0: return -99999, -99999 # if no matching files, force a failure + + # Variable to pull for brightness temperature +# if dataSource == 1: v = 'brightness_temperature_'+str(channel)+'@GsiHofXBc' # Forecast variable + if dataSource == 1: v = 'brightness_temperature_'+str(channel)+'@hofx' #'@depbg' # OMB + if dataSource == 2: v = obsVar + + # Read the files and put data in array + allData, allDataQC = [], [] + for inputFile in inputFiles: + nc_fid = Dataset(inputFile, "r", format="NETCDF4") #Dataset is the class behavior to open the file + print('Trying to read ',v,' from ',inputFile) + + # Read forecast/obs data + read_var = nc_fid.variables[v] # extract/copy the data + # read_missing = read_var.missing_value # get variable attributes. Each dataset has own missing values. + this_var = np.array( read_var ) # to numpy array + # this_var = np.where( this_var==read_missing, np.nan, this_var ) + + #if dataSource == 1: # If true, we just read in OMB data, but we want B + # obsData = np.array( nc_fid.variables[obsVar]) + # this_var = obsData - this_var # get background/forecast value (O - OMB = B) + + #Read QC data + qcData = np.array(nc_fid.variables[qcVar]) + + # Sanity check...shapes should match + if qcData.shape != this_var.shape: return -99999, -99999 + + if 'abi' in satellite or 'ahi' in satellite: + + # Get the GOES-16 retrieval data at the observation locations in this file + # GOES values < 0 mean clear sky + lats = np.array(nc_fid.variables['latitude@MetaData']) + lons = np.array(nc_fid.variables['longitude@MetaData']) + + # Get longitude to between (0,360) for consistency with GOES-16 files + lons = np.where( lons < 0, lons + 360.0, lons ) + + lonlat = np.array( list(zip(lons,lats))) # lon/lat pairs for each ob (nobs, 2) + thisGOESData = myGOESInterpolator(lonlat) # GOES data at obs locations in this file. If pressure, units are hPa + thisGOESData = thisGOESData * 100.0 # get into Pa + + #obsCldfra = np.array( nc_fid.variables['cloud_area_fraction@MetaData'] )*100.0 # Get into %...observed cloud fraction (AHI/ABI only) + + geoValsFile = inputFile.replace('obsout','geoval') + if not os.path.exists(geoValsFile): + print(geoValsFile+' not there. exit') + sys.exit() + + nc_fid2 = Dataset(geoValsFile, "r", format="NETCDF4") + fcstCldfra = np.array( nc_fid2.variables['cloud_area_fraction_in_atmosphere_layer'])*100.0 # Get into % + pressure = np.array( nc_fid2.variables['air_pressure']) # Pa + pressure_edges = np.array( nc_fid2.variables['air_pressure_levels']) # Pa + psfc = pressure_edges[:,-1] # Surface pressure (Pa)...array order is top down + if layerDefinitions.upper().strip() == 'ERA5': + PTOP_LOW = 0.8*psfc # these are arrays + PTOP_MID = 0.45*psfc + PTOP_HIGH = PTOP_HIGH_UPP * np.ones_like(psfc) + elif layerDefinitions.upper().strip() == 'UPP': + PTOP_LOW = PTOP_LOW_UPP # these are constants + PTOP_MID = PTOP_MID_UPP + PTOP_HIGH = PTOP_HIGH_UPP + else: + print('layerDefinitions = ',layerDefinitions,'is invalid. exit') + sys.exit() + fcstLow,fcstMid,fcstHigh,fcstTotCldFra = getFcstCloudFrac(fcstCldfra,pressure,psfc,layerDefinitions) # get low/mid/high/total forecast cloud fractions for each ob + nc_fid2.close() + + # Modify QC data based on correspondence between forecast and obs. qcData used to select good data later + # It's possible that there are multiple forecast layers, such that fcstLow,fcstMid,fcstHigh are all > $cldfraThresh + # However, GOES-16 CTP doesn't really account for layering. So, we need to remove layered clouds from the forecast, + # focusing only on the layers that we asked for when doing {low,mid,high}Only conditions + # The "|" is symbol for "np.logcal_or" + yes = 2.0 + no = 0.0 + cldfraThresh = 20.0 # percent + if qcData.shape == fcstTotCldFra.shape == thisGOESData.shape: # these should all match + print('Using condition ',condition,'for ABI/AHI') + + # Note that "&" is "np.logical_and" for boolean (true/false) quantities. + # Thus, each condition should be enclosed in parentheses + if condition.lower().strip() == 'clearOnly'.lower(): # clear in both forecast and obs + qcData = np.where( (fcstTotCldFra < cldfraThresh) & (thisGOESData <= 0.0), qcData, missing_values) + elif condition.lower().strip() == 'cloudyOnly'.lower(): # cloudy in both forecast and obs + qcData = np.where( (fcstTotCldFra >= cldfraThresh) & (thisGOESData > 0.0), qcData, missing_values) + elif condition.lower().strip() == 'lowOnly'.lower(): # low clouds in both forecast and obs + fcstLow = np.where( (fcstMid >= cldfraThresh) | ( fcstHigh >= cldfraThresh), missing_values, fcstLow) # remove mid, high + qcData = np.where( (fcstLow >= cldfraThresh) & ( thisGOESData >= PTOP_LOW), qcData, missing_values) + elif condition.lower().strip() == 'midOnly'.lower(): # mid clouds in both forecast and obs + fcstMid = np.where( (fcstLow >= cldfraThresh) | ( fcstHigh >= cldfraThresh), missing_values, fcstMid) # remove low, high + qcData = np.where( (fcstMid >= cldfraThresh) & (thisGOESData < PTOP_LOW) & (thisGOESData >= PTOP_MID), qcData, missing_values) + elif condition.lower().strip() == 'highOnly'.lower(): # high clouds in both forecast and obs + fcstHigh = np.where( (fcstLow >= cldfraThresh) | ( fcstMid >= cldfraThresh), missing_values, fcstHigh) # remove mid, high + qcData = np.where( (fcstHigh >= cldfraThresh) & (thisGOESData < PTOP_MID) & (thisGOESData >= PTOP_HIGH), qcData, missing_values) + elif condition.lower().strip() == 'fcstLow'.lower(): # low clouds in forecast (layers possible); obs could be anything + qcData = np.where( fcstLow >= cldfraThresh , qcData, missing_values) + elif condition.lower().strip() == 'fcstMid'.lower(): # low clouds in forecast (layers possible); obs could be anything + qcData = np.where( fcstMid >= cldfraThresh , qcData, missing_values) + elif condition.lower().strip() == 'fcstHigh'.lower(): # low clouds in forecast (layers possible); obs could be anything + qcData = np.where( fcstHigh >= cldfraThresh , qcData, missing_values) + elif condition.lower().strip() == 'cloudEventLow'.lower(): + if dataSource == 1: this_var = np.where( fcstLow >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( thisGOESData >= PTOP_LOW, yes, no ) + elif condition.lower().strip() == 'cloudEventMid'.lower(): + if dataSource == 1: this_var = np.where( fcstMid >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( (thisGOESData < PTOP_LOW) & (thisGOESData >= PTOP_MID), yes, no ) + elif condition.lower().strip() == 'cloudEventHigh'.lower(): + if dataSource == 1: this_var = np.where( fcstHigh >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( (thisGOESData < PTOP_MID) & (thisGOESData >= PTOP_HIGH), yes, no ) + elif condition.lower().strip() == 'cloudEventTot'.lower(): + if dataSource == 1: this_var = np.where( fcstTotCldFra >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( thisGOESData > 0.0, yes, no ) + elif condition.lower().strip() == 'all': + print("not doing any conditional verification or stratifying by event") + else: + print("condition = ",condition," not recognized.") + sys.exit() + #elif condition.lower().strip() == '4x4table'.lower(): + #if dataSource == 1: + # this_var = np.where( fcstLow >= cldfraThresh, yesLow, no ) + # this_var = this_var + np.where( fcstMid >= cldfraThresh, yesMid, no ) + # this_var = this_var + np.where( fcstHigh >= cldfraThresh, yesHigh, no ) + print('number removed = ', (qcData==missing_values).sum()) + #print('number passed = ', qcData.shape[0] - (qcData==missing_values).sum()) + else: + print('shape mismatch') + return -99999, -99999 + + # Append to arrays + allData.append(this_var) + allDataQC.append(qcData) + + nc_fid.close() # done with the file, so close it before going to next file in loop + + # We're now all done looping over the individul files + + # Get the indices with acceptable QC + allQC = np.concatenate(allDataQC) # Put list of numpy arrays into a single long 1-D numpy array. All QC data. + idx = np.where(allQC==0) # returns indices + + # Now get all the forecast/observed brightness temperature data with acceptable QC + this_var = np.concatenate(allData)[idx] # Put list of numpy arrays into a single long 1-D numpy array. This is all the forecast/obs data with good QC + numObs = this_var.shape[0] # number of points with good QC for this channel + print('Number of obs :',numObs) + + # Assume all the points actually fit into a square grid. Get the side of the square (use ceil to round up) + if numObs > 0: + l = np.ceil(np.sqrt(numObs)).astype('int') # Length of the side of the square + + # Make an array that can be reshaped into the square + raw_data1D = np.full(l*l,np.nan) # Initialize 1D array of length l**2 to np.nan + raw_data1D[0:numObs] = this_var[:] # Fill data to the extent possible. There will be some np.nan values at the end + raw_data = np.reshape(raw_data1D,(l,l)) # Reshape into "square grid" + + raw_data = np.where(np.isnan(raw_data), missing_values, raw_data) # replace np.nan to missing_values (for MET) + + met_data=raw_data.astype(float) # Give MET this info + + # Now need to tell MET the "grid" for the data + # Make a fake lat/lon grid going from 0.0 to 50.0 degrees, with the interval determined by number of points + griddedDatasets[source]['latDef'][0] = 0.0 # starting point + griddedDatasets[source]['latDef'][1] = np.diff(np.linspace(0,50,l)).round(6)[0] # interval (degrees) + griddedDatasets[source]['latDef'][2] = int(l) # number of points + griddedDatasets[source]['lonDef'][0:3] = griddedDatasets[source]['latDef'] + + gridInfo = getGridInfo(source, griddedDatasets[source]['gridType']) # 'LatLon' gridType + return met_data, gridInfo + + else: + return -99999, -99999 + +########### +def getGridInfo(source,gridType): + + if gridType == 'LatLon': + latDef = griddedDatasets[source]['latDef'] + lonDef = griddedDatasets[source]['lonDef'] + gridInfo = { + 'type': gridType, + 'name': source, + 'lat_ll': latDef[0], #-90.000, + 'lon_ll': lonDef[0], #-180.000, + 'delta_lat': latDef[1], #0.5000, + 'delta_lon': lonDef[1], #0.625, + 'Nlat': latDef[2], #361, + 'Nlon': lonDef[2], #576, + } + elif gridType == 'Gaussian': + gridInfo = { + 'type': gridType, + 'name': source, + 'nx': griddedDatasets[source]['nx'], + 'ny': griddedDatasets[source]['ny'], + 'lon_zero': griddedDatasets[source]['lon_zero'], + } + + return gridInfo + +def getAttrArray(source,variable,initTime,validTime): + + init = dt.datetime.strptime(initTime,"%Y%m%d%H") + valid = dt.datetime.strptime(validTime,"%Y%m%d%H") + lead, rem = divmod((valid-init).total_seconds(), 3600) + + attrs = { + + 'valid': valid.strftime("%Y%m%d_%H%M%S"), + 'init': init.strftime("%Y%m%d_%H%M%S"), + 'lead': str(int(lead)), + 'accum': '000000', + + 'name': variable, #'MERRA2_Cloud_Percentage' + 'long_name': variable, #'Cloud Percentage Levels', + 'level': 'ALL', + 'units': verifVariables[variable]['units'], + + 'grid': getGridInfo(source,griddedDatasets[source]['gridType']) + } + + #print(attrs) + #print(griddedDatasets[source]) + + return attrs + +######## END FUNCTIONS ########## + + +#if __name__ == "__main__": +dataFile, dataSource, variable, i_date, v_date, flag = sys.argv[1].split(":") +met_data = getDataArray(dataFile,dataSource,variable,flag) +attrs = getAttrArray(dataSource,variable,i_date,v_date) +print(attrs) From 0a36b91f1a6c7b18bc23caf4c34c134fd9f6f72e Mon Sep 17 00:00:00 2001 From: j-opatz Date: Fri, 2 Jun 2023 18:47:26 -0600 Subject: [PATCH 3/6] added 5th use case, docs --- .../GridStat_fcstMPAS_obsERA5_cloudBaseHgt.py | 134 +++ internal/tests/use_cases/all_use_cases.txt | 1 + ...ridStat_fcstMPAS_obsERA5_cloudBaseHgt.conf | 160 +++ .../read_input_data.py | 911 ++++++++++++++++++ 4 files changed, 1206 insertions(+) create mode 100644 docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.py create mode 100644 parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.conf create mode 100755 parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt/read_input_data.py diff --git a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.py b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.py new file mode 100644 index 0000000000..b2f481b0d7 --- /dev/null +++ b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.py @@ -0,0 +1,134 @@ +""" +GridStat: Cloud Height with Neighborhood and Probabilities +========================================================== + +model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.conf + +""" +############################################################################## +# Scientific Objective +# -------------------- +# +# This use case captures various statistical measures of two model comparisons +# for cloud base height with different neighborhood settings for internal +# model metrics and to aid in future model updates +# + +############################################################################## +# Datasets +# -------- +# +# | **Forecast:** Model for Prediction Across Scales (MPAS) +# | **Observations:** ECMWF Reanalysis, Version 5 (ERA5) +# | **Grid:** GPP 17km masking region +# +# | **Location:** All of the input data required for this use case can be found in the met_test sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases +# | This tarball should be unpacked into the directory that you will set the value of INPUT_BASE. See 'Running METplus' section for more information. +# + +############################################################################## +# METplus Components +# ------------------ +# +# This use case utilizes Python Embedding, which is called using the PYTHON_NUMPY keyword +# in the forecast and observation input template settings. The same Python script processes both forecast and +# observation datasets. The forecast field is verified against the respective observation field, +# with the Python script being passed the input file, the model name, the variable name being analyzed, +# the initialization and valid times, and a flag to indicate if the field passed is observation or forecast. +# This process is repeated with 2 instance names to GridStat, each with a different setting for regridding, +# neighborhood evaluation, thresholding, output line types, and output prefix names. + +############################################################################## +# METplus Workflow +# ---------------- +# +# GridStat is the only MET tool called in this example. +# It processes the following run time: +# +# | **Init:** 2020-07-23 00Z +# | **Forecast lead:** 36 hour +# | +# Because instance names are used, GridStat will run 2 times for this 1 initalization time. + +############################################################################## +# METplus Configuration +# --------------------- +# +# METplus first loads the default configuration file found in parm/metplus_config, +# then it loads any configuration files passed to METplus via the command line: +# parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.conf +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.conf + +############################################################################## +# MET Configuration +# ----------------- +# +# METplus sets environment variables based on user settings in the METplus configuration file. +# See :ref:`How METplus controls MET config file settings` for more details. +# +# **YOU SHOULD NOT SET ANY OF THESE ENVIRONMENT VARIABLES YOURSELF! THEY WILL BE OVERWRITTEN BY METPLUS WHEN IT CALLS THE MET TOOLS!** +# +# If there is a setting in the MET configuration file that is currently not supported by METplus you'd like to control, please refer to: +# :ref:`Overriding Unsupported MET config file settings` +# +# .. note:: See the :ref:`GridStat MET Configuration` section of the User's Guide for more information on the environment variables used in the file below: +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/met_config/GridStatConfig_wrapped + +############################################################################## +# Python Embedding +# ---------------- +# +# This use case utilizes 1 Python script to read and process both forecast and +# observation fields. +# parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt/read_input_data.py +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt/read_input_data.py + +############################################################################## +# Running METplus +# --------------- +# +# Pass the use case configuration file to the run_metplus.py script +# along with any user-specific system configuration files if desired:: +# +# run_metplus.py /path/to/METplus/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.conf /path/to/user_system.conf +# +# See :ref:`running-metplus` for more information. + +############################################################################## +# Expected Output +# --------------- +# +# A successful run will output the following both to the screen and to the logfile:: +# +# INFO: METplus has successfully finished running. +# +# Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. +# Output for this use case will be found in model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac +# (relative to **OUTPUT_BASE**) +# and will contain the following files: +# +# * grid_stat_MPAS_to_ERA5_F36_CloudFracs_360000L_20200724_120000V_pairs.nc +# * grid_stat_MPAS_to_ERA5_F36_CloudFracs_360000L_20200724_120000V.stat +# * grid_stat_MPAS_to_ERA5_F36_CloudFracs_NBR_360000L_20200724_120000V_pairs.nc +# * grid_stat_MPAS_to_ERA5_F36_CloudFracs_NBR_360000L_20200724_120000V.stat + +############################################################################## +# Keywords +# -------- +# +# .. note:: +# +# * GridStatToolUseCase +# * NetCDFFileUseCase +# * AirQualityAndCompAppUseCase +# * PythonEmbeddingFileUseCase +# +# Navigate to the :ref:`quick-search` page to discover other similar use cases. +# +# sphinx_gallery_thumbnail_path = '_static/air_quality_and_comp-GridStat_fcstMPAS_obsERA5_cloudBaseHgt.png' diff --git a/internal/tests/use_cases/all_use_cases.txt b/internal/tests/use_cases/all_use_cases.txt index f2148826c5..73e1838de5 100644 --- a/internal/tests/use_cases/all_use_cases.txt +++ b/internal/tests/use_cases/all_use_cases.txt @@ -68,6 +68,7 @@ Category: air_quality_and_comp 2::GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp::model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.conf:: pandac_env, py_embed 3::GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac::model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.conf:: pandac_env, py_embed 4::GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac::model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.conf:: pandac_env, py_embed +5::GridStat_fcstMPAS_obsERA5_cloudBaseHgt::model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.conf:: pandac_env, py_embed Category: climate diff --git a/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.conf b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.conf new file mode 100644 index 0000000000..46cef96d4f --- /dev/null +++ b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.conf @@ -0,0 +1,160 @@ +[config] + +# Documentation for this use case can be found at +# https://metplus.readthedocs.io/en/latest/generated/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.html + +# For additional information, please see the METplus Users Guide. +# https://metplus.readthedocs.io/en/latest/Users_Guide + +# ### +# Processes to run +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#process-list +### + + +PROCESS_LIST = GridStat, GridStat(nbr) + +### +# Time Info +# LOOP_BY options are INIT, VALID, RETRO, and REALTIME +# If set to INIT or RETRO: +# INIT_TIME_FMT, INIT_BEG, INIT_END, and INIT_INCREMENT must also be set +# If set to VALID or REALTIME: +# VALID_TIME_FMT, VALID_BEG, VALID_END, and VALID_INCREMENT must also be set +# LEAD_SEQ is the list of forecast leads to process +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#timing-control +### + +LOOP_BY = INIT +INIT_TIME_FMT = %Y%m%d%H +INIT_BEG=2020072300 +INIT_END=2020072300 +INIT_INCREMENT = 12H + +LEAD_SEQ = 36 + +LOOP_ORDER = times + +### +# File I/O +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#directory-and-filename-template-info +### + +FCST_GRID_STAT_INPUT_DIR = +FCST_GRID_STAT_INPUT_TEMPLATE = PYTHON_NUMPY + +OBS_GRID_STAT_INPUT_DIR = +OBS_GRID_STAT_INPUT_TEMPLATE = PYTHON_NUMPY + +GRID_STAT_CLIMO_MEAN_INPUT_DIR = +GRID_STAT_CLIMO_MEAN_INPUT_TEMPLATE = + +GRID_STAT_CLIMO_STDEV_INPUT_DIR = +GRID_STAT_CLIMO_STDEV_INPUT_TEMPLATE = + +GRID_STAT_OUTPUT_DIR = {OUTPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt +GRID_STAT_OUTPUT_TEMPLATE = + + +### +# Field Info +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#field-info +### + +MODEL = MPAS +OBTYPE = ERA5 + +CONFIG_DIR = {PARM_BASE}/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt + +FCST_VAR1_NAME = {CONFIG_DIR}/read_input_data.py {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt/diag.{valid?fmt=%Y-%m-%d_%H}.00.00_latlon.nc:{MODEL}:cloudBaseHeight:{init?fmt=%Y%m%d%H}:{valid?fmt=%Y%m%d%H}:1 +FCST_VAR1_LEVELS = +FCST_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 + +OBS_VAR1_NAME = {CONFIG_DIR}/read_input_data.py {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt/ERA5_{valid?fmt=%Y%m%d%H}_Cld.nc:{OBTYPE}:cloudBaseHeight:{init?fmt=%Y%m%d%H}:{valid?fmt=%Y%m%d%H}:2 +OBS_VAR1_LEVELS = +OBS_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 + + +### +# GridStat Settings +# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#gridstat +### + +#LOG_GRID_STAT_VERBOSITY = 2 + +GRID_STAT_CONFIG_FILE = {PARM_BASE}/met_config/GridStatConfig_wrapped + +GRID_STAT_REGRID_TO_GRID = FCST +GRID_STAT_REGRID_METHOD = BILIN +GRID_STAT_REGRID_WIDTH = 2 + +GRID_STAT_DESC = + +FCST_GRID_STAT_FILE_WINDOW_BEGIN = 0 +FCST_GRID_STAT_FILE_WINDOW_END = 0 +OBS_GRID_STAT_FILE_WINDOW_BEGIN = 0 +OBS_GRID_STAT_FILE_WINDOW_END = 0 + +GRID_STAT_NEIGHBORHOOD_WIDTH = 1 +GRID_STAT_NEIGHBORHOOD_SHAPE = SQUARE + +GRID_STAT_NEIGHBORHOOD_COV_THRESH = >=0.5 + +GRID_STAT_ONCE_PER_FIELD = False + +FCST_IS_PROB = false + +FCST_GRID_STAT_PROB_THRESH = ==0.1 + +OBS_IS_PROB = false + +OBS_GRID_STAT_PROB_THRESH = ==0.1 + +GRID_STAT_OUTPUT_PREFIX = {MODEL}_to_{OBTYPE}_F{lead?fmt=%H}_CloudBaseHght + +GRID_STAT_OUTPUT_FLAG_FHO = STAT +GRID_STAT_OUTPUT_FLAG_CTC = STAT +GRID_STAT_OUTPUT_FLAG_CTS = STAT +GRID_STAT_OUTPUT_FLAG_CNT = STAT +GRID_STAT_OUTPUT_FLAG_SL1L2 = STAT +GRID_STAT_OUTPUT_FLAG_GRAD = STAT + +GRID_STAT_NC_PAIRS_FLAG_LATLON = TRUE +GRID_STAT_NC_PAIRS_FLAG_RAW = TRUE +GRID_STAT_NC_PAIRS_FLAG_DIFF = TRUE +GRID_STAT_NC_PAIRS_FLAG_CLIMO = FALSE +GRID_STAT_NC_PAIRS_FLAG_GRADIENT = TRUE +GRID_STAT_NC_PAIRS_FLAG_APPLY_MASK = TRUE + +GRID_STAT_MASK_POLY = {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt/GPP_17km_60S_60N_mask.nc + +[nbr] + +FCST_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0, >SFP20, >SFP30, >SFP40, >SFP50, >SFP60, >SFP70, >SFP80 + +OBS_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0, >SOP20, >SOP30, >SOP40, >SOP50, >SOP60, >SOP70, >SOP80 + +GRID_STAT_NEIGHBORHOOD_WIDTH = 3, 5, 7, 9 +GRID_STAT_NEIGHBORHOOD_SHAPE = CIRCLE + +GRID_STAT_NEIGHBORHOOD_COV_THRESH = >0.0 + +GRID_STAT_OUTPUT_FLAG_FHO = NONE +GRID_STAT_OUTPUT_FLAG_CTC = NONE +GRID_STAT_OUTPUT_FLAG_CTS = NONE +GRID_STAT_OUTPUT_FLAG_CNT = NONE +GRID_STAT_OUTPUT_FLAG_SL1L2 = NONE +GRID_STAT_OUTPUT_FLAG_NBRCTC = STAT +GRID_STAT_OUTPUT_FLAG_NBRCTS = STAT +GRID_STAT_OUTPUT_FLAG_NBRCNT = STAT +GRID_STAT_OUTPUT_FLAG_GRAD = NONE + +GRID_STAT_NC_PAIRS_FLAG_LATLON = TRUE +GRID_STAT_NC_PAIRS_FLAG_RAW = TRUE +GRID_STAT_NC_PAIRS_FLAG_DIFF = TRUE +GRID_STAT_NC_PAIRS_FLAG_NBRHD = TRUE +GRID_STAT_NC_PAIRS_FLAG_GRADIENT = TRUE +GRID_STAT_NC_PAIRS_FLAG_APPLY_MASK = TRUE + +GRID_STAT_OUTPUT_PREFIX = {MODEL}_to_{OBTYPE}_F{lead?fmt=%H}_CloudBaseHght_NBR + diff --git a/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt/read_input_data.py b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt/read_input_data.py new file mode 100755 index 0000000000..aa436135a6 --- /dev/null +++ b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt/read_input_data.py @@ -0,0 +1,911 @@ +#this code was provided by Craig Schwartz +#and is largely unaltered from its original +#function. + +#from __future__ import print_function +import os +import sys +import numpy as np +import datetime as dt +from netCDF4 import Dataset # http://code.google.com/p/netcdf4-python/ +from scipy.interpolate import NearestNDInterpolator, LinearNDInterpolator +#### for Plotting +import matplotlib.cm as cm +import matplotlib.axes as maxes +import matplotlib.pyplot as plt +from mpl_toolkits.axes_grid1 import make_axes_locatable +#from mpl_toolkits.basemap import Basemap +import fnmatch +import pygrib +import pickle as pk +##### + +########################################### + +missing_values = -9999.0 # for MET + +# UPP top layer bounds (Pa) for cloud layers +PTOP_LOW_UPP = 64200. # low for > 64200 Pa +PTOP_MID_UPP = 35000. # mid between 35000-64200 Pa +PTOP_HIGH_UPP = 15000. # high between 15000-35000 Pa + +# Values for 4 x 4 contingency table +Na, Nb, Nc, Nd = 1, 2, 3, 4 +Ne, Nf, Ng, Nh = 5, 6, 7, 8 +Ni, Nj, Nk, Nl = 9, 10, 11, 12 +Nm, Nn, No, Np = 13, 14, 15, 16 + +# Notes: +# 1) Entry for 'point' is for point-to-point comparison and is all dummy data (except for gridType) that is overwritten by point2point +# 2) ERA5 on NCAR CISL RDA changed at some point. Old is ERA5_2017 (not used anymore), new is ERA5, which we'll use for 2020 data +griddedDatasets = { + 'MERRA2' : { 'gridType':'LatLon', 'latVar':'lat', 'latDef':[-90.0,0.50,361], 'lonVar':'lon', 'lonDef':[-180.0,0.625,576], 'flipY':True, 'ftype':'nc'}, + 'SATCORPS' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[-180.0,0.3125,1152], 'flipY':False, 'ftype':'nc' }, + 'ERA5_2017': { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-89.7848769072,0.281016829130516,640], 'lonVar':'longitude', 'lonDef':[0.0,0.28125,1280], 'flipY':False, 'ftype':'nc' }, + 'ERA5' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'nc' }, + 'GFS' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'grib'}, + 'GALWEM' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':True, 'ftype':'grib'}, + 'GALWEM17' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-89.921875,0.156250,1152], 'lonVar':'longitude', 'lonDef':[0.117187,0.234375,1536], 'flipY':False, 'ftype':'grib'}, + 'WWMCA' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'grib'}, + 'MPAS' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'nc'}, + 'SAT_WWMCA_MEAN' : { 'gridType':'LatLon', 'latVar':'lat','latDef':[-90.0,0.25,721], 'lonVar':'lon', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'nc' }, + 'point' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.156250,1152], 'lonVar':'longitude', 'lonDef':[0.117187,0.234375,1536], 'flipY':False, 'ftype':'nc'}, +} + #TODO:Correct one, but MET can ingest a Gaussian grid only in Grib2 format (from Randy B.) + #'ERA5' : { 'gridType':'Gaussian', 'nx':1280, 'ny':640, 'lon_zero':0, 'latVar':'latitude', 'lonVar':'longitude', 'flipY':False, }, + +#GALWEM, both 17-km and 0.25-degree +lowCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':3, 'typeOfFirstFixedSurface':10, 'shortName':'lcc' } +midCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':4, 'typeOfFirstFixedSurface':10, 'shortName':'mcc' } +highCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':5, 'typeOfFirstFixedSurface':10, 'shortName':'hcc' } +totalCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':10, 'shortName':'tcc' } +cloudTopHeight_GALWEM = { 'parameterCategory':6, 'parameterNumber':12, 'typeOfFirstFixedSurface':3, 'shortName':'cdct' } +cloudBaseHeight_GALWEM = { 'parameterCategory':6, 'parameterNumber':11, 'typeOfFirstFixedSurface':2, 'shortName':'cdcb' } + +#GFS +lowCloudFrac_GFS = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':214, 'shortName':'tcc' } +midCloudFrac_GFS = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':224, 'shortName':'tcc' } +highCloudFrac_GFS = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':234, 'shortName':'tcc' } + +#WWMCA +totalCloudFrac_WWMCA = { 'parameterName':71, 'typeOfLevel':'entireAtmosphere', 'level':0 } + +cloudTopHeightLev1_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':1 } +cloudTopHeightLev2_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':2 } +cloudTopHeightLev3_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':3 } +cloudTopHeightLev4_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':4 } +cloudTopHeight_WWMCA = [ cloudTopHeightLev1_WWMCA, cloudTopHeightLev2_WWMCA, cloudTopHeightLev3_WWMCA, cloudTopHeightLev4_WWMCA ] + +cloudBaseHeightLev1_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':1 } +cloudBaseHeightLev2_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':2 } +cloudBaseHeightLev3_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':3 } +cloudBaseHeightLev4_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':4 } +cloudBaseHeight_WWMCA = [ cloudBaseHeightLev1_WWMCA, cloudBaseHeightLev2_WWMCA, cloudBaseHeightLev3_WWMCA, cloudBaseHeightLev4_WWMCA ] + +verifVariablesModel = { + 'binaryCloud' : {'GFS':[''], 'GALWEM17':[totalCloudFrac_GALWEM], 'GALWEM':[totalCloudFrac_GALWEM], 'MPAS':['cldfrac_tot_UM_rand']}, + 'totalCloudFrac' : {'GFS':[''], 'GALWEM17':[totalCloudFrac_GALWEM], 'GALWEM':[totalCloudFrac_GALWEM], 'MPAS':['cldfrac_tot_UM_rand']}, + 'lowCloudFrac' : {'GFS':[lowCloudFrac_GFS], 'GALWEM17':[lowCloudFrac_GALWEM], 'GALWEM':[lowCloudFrac_GALWEM], 'MPAS':['cldfrac_low_UM']}, + 'midCloudFrac' : {'GFS':[midCloudFrac_GFS], 'GALWEM17':[midCloudFrac_GALWEM], 'GALWEM':[midCloudFrac_GALWEM], 'MPAS':['cldfrac_mid_UM']}, + 'highCloudFrac' : {'GFS':[highCloudFrac_GFS], 'GALWEM17':[highCloudFrac_GALWEM], 'GALWEM':[highCloudFrac_GALWEM], 'MPAS':['cldfrac_high_UM']}, + 'cloudTopHeight' : {'GFS':[''] , 'GALWEM17':[cloudTopHeight_GALWEM], 'GALWEM':[cloudTopHeight_GALWEM], 'MPAS':['cldht_top_UM']}, + 'cloudBaseHeight' : {'GFS':[''] , 'GALWEM17':[cloudBaseHeight_GALWEM], 'GALWEM':[cloudBaseHeight_GALWEM], 'MPAS':['cldht_base_UM']}, +} + +cloudFracCatThresholds = '>0, <10.0, >=10.0, >=20.0, >=30.0, >=40.0, >=50.0, >=60.0, >=70.0, >=80.0, >=90.0' # MET format string +brightnessTempThresholds = '<280.0, <275.0, <273.15, <270.0, <265.0, <260.0, <255.0, <250.0, <245.0, <240.0, <235.0, <230.0, <225.0, <220.0, <215.0, <210.0, <=SFP1, <=SFP5, <=SFP10, <=SFP25, <=SFP50, >=SFP50, >=SFP75, >=SFP90, >=SFP95, >=SFP99' +verifVariables = { + 'binaryCloud' : { 'MERRA2':['CLDTOT'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['TCC'], 'WWMCA':[totalCloudFrac_WWMCA], 'SAT_WWMCA_MEAN':['Mean_WWMCA_SATCORPS'], 'units':'NA', 'thresholds':'>0.0', 'interpMethod':'nearest' }, + 'totalCloudFrac' : { 'MERRA2':['CLDTOT'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['tcc'], 'WWMCA':[totalCloudFrac_WWMCA], 'SAT_WWMCA_MEAN':['Mean_WWMCA_SATCORPS'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'lowCloudFrac' : { 'MERRA2':['CLDLOW'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['lcc'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'midCloudFrac' : { 'MERRA2':['CLDMID'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['MCC'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'highCloudFrac' : { 'MERRA2':['CLDHGH'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['HCC'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'cloudTopTemp' : { 'MERRA2':['CLDTMP'], 'SATCORPS':['cloud_temperature_top_level'], 'ERA5':[''] , 'units':'K', 'thresholds':'NA', 'interpMethod':'bilin'}, + 'cloudTopPres' : { 'MERRA2':['CLDPRS'], 'SATCORPS':['cloud_pressure_top_level'], 'ERA5':[''] , 'units':'hPa', 'thresholds':'NA', 'interpMethod':'bilin'}, + 'cloudTopHeight' : { 'MERRA2':[''] , 'SATCORPS':['cloud_height_top_level'], 'ERA5':[''] , 'WWMCA':cloudTopHeight_WWMCA, 'units':'m', 'thresholds':'NA', 'interpMethod':'nearest'}, + 'cloudBaseHeight': { 'MERRA2':[''] , 'SATCORPS':['cloud_height_base_level'], 'ERA5':['cbh'], 'WWMCA':cloudBaseHeight_WWMCA, 'units':'m', 'thresholds':'NA', 'interpMethod':'nearest'}, + 'cloudCeiling' : { 'MERRA2':[''] , 'SATCORPS':[''], 'ERA5':[''] , 'units':'m', 'thresholds':'NA', 'interpMethod':'bilin'}, + 'brightnessTemp' : { 'MERRA2':[''] , 'SATCORPS':[''], 'ERA5':[''] , 'units':'K', 'thresholds':brightnessTempThresholds, 'interpMethod':'bilin'}, +} + +# Combine the two dictionaries +# Only reason verifVariablesModel exists is just for space--verifVaribles gets too long if we keep adding more datasets +for key in verifVariablesModel.keys(): + x = verifVariablesModel[key] + for key1 in x.keys(): + verifVariables[key][key1] = x[key1] + +#f = '/glade/u/home/schwartz/cloud_verification/GFS_grib_0.25deg/2018112412/gfs.0p25.2018112412.f006.grib2' +#grbs = pygrib.open(f) +#idx = pygrib.index(f,'parameterCategory','parameterNumber','typeOfFirstFixedSurface') +#model = 'GFS' +#variable = 'totCloudCover' +#x = verifVariablesModel[variable][model] # returns a list, whose ith element is a dictionary +# e.g., idx(parameterCategory=6,parameterNumber=1,typeOfFirstFixedSurface=234) +#idx(parameterCategory=x[0]['parameterCategory'],parameterNumber=x[0]['parameterNumber'],typeOfFirstFixedSurface=x[0]['typeOfFirstFixedSurface']) + +# to read in an environmental variable +#x = os.getenv('a') # probably type string no matter what + +########### + +def getThreshold(variable): + x = verifVariables[variable]['thresholds'] + print(x) # needed for python 3 to read variable into csh variable + return x + +def getInterpMethod(variable): + x = verifVariables[variable]['interpMethod'].upper() + print(x) # needed for python 3 to read variable into csh variable + return x + +def getTotalCloudFrac(source,data): + if source == 'SATCORPS': + # x = data[0][0,:,:,0] * 1.0E-2 # scaling + x = (data[0][0,:,:,1] + data[0][0,:,:,2] + data[0][0,:,:,3])*1.0E-2 # scaling + # y = data[0] + # x = np.sum( y[:,:,:,1:4],axis=3) + elif source == 'MERRA2': +# x = ( data[0][0,:,:]+data[1][0,:,:]+data[2][0,:,:] ) *100.0 # the ith element of data is a numpy array + x = data[0][0,:,:] * 100.0 # the ith element of data is a numpy array + print(x.min(), x.max()) + elif source == 'ERA5': + try: x = data[0][0,0,:,:] * 100.0 + except: x = data[0][0,:,:] * 100.0 + elif source == 'MPAS': + x = data[0][0,:,:] * 100.0 + elif source == 'SAT_WWMCA_MEAN': + x = data[0][0,:,:] # already in % + else: + x = data[0] + + # This next line is WRONG. + # Missing should be set to missing + # Then, the non-missing values are 1s and 0s + #output = np.where(x > 0.0, x, 0.0) + #output = np.where(x < 0.0, -9999.0, x) # missing. currently used for SATCORPS + + x = np.where( x < 0.0 , 0.0, x) # Force negative values to zero + x = np.where( x > 100.0, 100.0, x) # Force values > 100% to 100% + return x + +def getBinaryCloud(source,data): + y = getTotalCloudFrac(source,data) + # keep NaNs as is, but then set everything else to either 100% or 0% + x = np.where( np.isnan(y), y, np.where(y > 0.0, 100.0, 0.0) ) + return x + +def getLayerCloudFrac(source,data,layer): + if source == 'SATCORPS': + if layer.lower().strip() == 'low' : i = 1 + if layer.lower().strip() == 'mid' : i = 2 + if layer.lower().strip() == 'high' : i = 3 + x = data[0][0,:,:,i] * 1.0E-2 # scaling + elif source == 'MERRA2': + x = data[0][0,:,:] * 100.0 + elif source == 'ERA5': + try: x = data[0][0,0,:,:] * 100.0 + except: x = data[0][0,:,:] * 100.0 + elif source == 'MPAS': + x = data[0][0,:,:] * 100.0 + else: + x = data[0] + + x = np.where( x < 0.0, 0.0, x) # Force negative values to zero + x = np.where( x > 100.0, 100.0, x) # Force values > 100% to 100% + + return x + +def getCloudTopTemp(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E-2 # scaling + elif source == 'MERRA2': + x = data[0][0,:,:] + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + else: + x = data[0] + return x + +def getCloudTopPres(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E-1 # scaling + elif source == 'MERRA2': + x = data[0][0,:,:] * 1.0E-2 # scaling [Pa] -> [hPa] + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + else: + x = data[0] + return x + +def getCloudTopHeight(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E+1 # scaling to [meters] + elif source == 'MERRA2': + x = data[0][0,:,:] #TBD + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + elif source == 'GALWEM17': + x = data[0] * 1000.0 * 0.3048 # kilofeet -> meters + elif source == 'MPAS': + x = data[0][0,:,:] # already in meters + elif source == 'WWMCA': + # data is a list (should be length 4) + if len(data) != 4: + print('error with WWMCA Cloud top height') + sys.exit() + tmp = np.array(data) # already in meters + tmp = np.where( tmp <= 0, np.nan, tmp) # replace 0 or negative values with NAN + x = np.nanmax(tmp,axis=0) # get maximum cloud top height across all layers + else: + x = data[0] + + # Eliminate unphysical values (assume cloud top shouldn't be > 50000 meters) + y = np.where( x > 50000.0 , np.nan, x ) + + return y + +def getCloudBaseHeight(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E+1 # scaling to [meters] + elif source == 'MERRA2': + x = data[0][0,:,:] #TBD + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + elif source == 'GALWEM17': + x = data[0] * 1000.0 * 0.3048 # kilofeet -> meters + elif source == 'MPAS': + x = data[0][0,:,:] # already in meters + elif source == 'WWMCA': + # data is a list (should be length 4) + if len(data) != 4: + print('error with WWMCA Cloud base height') + sys.exit() + tmp = np.array(data) # already in meters + tmp = np.where( tmp <= 0, np.nan, tmp) # replace 0 or negative values with NAN + x = np.nanmin(tmp,axis=0) # get lowest cloud base over all layers + else: + x = data[0] + + # Eliminate unphysical values (assume cloud base shouldn't be > 50000 meters) + y = np.where( x > 50000.0 , np.nan, x ) + + return y + +def getCloudCeiling(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] #TBD + elif source == 'MERRA2': + x = data[0][0,:,:] #TBD + elif source == 'ERA5': + try: x = data[0][0,0,:,:] # TBD + except: x = data[0][0,:,:] + return x + +# add other functions for different variables + +########### + +def getDataArray(inputFile,source,variable,dataSource): + # 1) inputFile: File name--either observations or forecast + # 2) source: Obsevation source (e.g., MERRA, SATCORP, etc.) + # 3) variable: Variable to verify + # 4) dataSource: If 1, process forecast file. If 2 process obs file. + +# # specifying names here temporarily. file names should be passed in to python from shell script +# if source == 'merra': nc_file = '/gpfs/fs1/scratch/schwartz/MERRA/MERRA2_400.tavg1_2d_rad_Nx.20181101.nc4' +# elif source == 'satcorp': nc_file = '/glade/scratch/bjung/met/test_satcorps/GEO-MRGD.2018334.0000.GRID.NC' +# elif source == 'era5': nc_file = '/glade/scratch/bjung/met/test_era5/e5.oper.fc.sfc.instan.128_164_tcc.regn320sc.2018111606_2018120112.nc' + + source = source.upper().strip() # Force uppercase and get rid of blank spaces, for safety + + print('dataSource = ',dataSource) + + ftype = griddedDatasets[source]['ftype'].lower().strip() + + # Get file handle + if ftype == 'nc': + nc_fid = Dataset(inputFile, "r", format="NETCDF4") + #nc_fid.set_auto_scale(True) + elif ftype == 'grib': + if source == 'WWMCA': + idx = pygrib.index(inputFile,'parameterName','typeOfLevel','level') + else: + idx = pygrib.index(inputFile,'parameterCategory','parameterNumber','typeOfFirstFixedSurface') + + # dataSource == 1 means forecast, 2 means obs +# if dataSource == 1: varsToRead = verifVariablesModel[variable][source] # if ftype == 'grib', returns a list whose ith element is a dictionary. otherwise, just a list +# if dataSource == 2: varsToRead = verifVariables[variable][source] # returns a list + varsToRead = verifVariables[variable][source] # if ftype == 'grib', returns a list whose ith element is a dictionary. otherwise, just a list + + print('Trying to read ',inputFile) + + # Get lat/lon information--currently not used + #latVar = griddedDatasets[source]['latVar'] + #lonVar = griddedDatasets[source]['lonVar'] + #lats = np.array(nc_fid.variables[latVar][:]) # extract/copy the data + #lons = np.array(nc_fid.variables[lonVar][:] ) + + #print(lats.max()) + #print(lons.max()) + + # one way to deal with scale factors + # probably using something like nc_fid.set_auto_scale(True) is better... + #latMax = lats.max() + #while latMax > 90.0: + # lons = lons * 0.1 + # lats = lats * 0.1 + # latMax = lats.max() + + # get data + data = [] + for v in varsToRead: + if ftype == 'grib': + if source == 'WWMCA': + x = idx(parameterName=v['parameterName'],typeOfLevel=v['typeOfLevel'],level=v['level'])[0] # by getting element 0, you get a pygrib message + else: + # e.g., idx(parameterCategory=6,parameterNumber=1,typeOfFirstFixedSurface=234) + if ( variable == 'cloudTopHeight' or variable == 'cloudBaseHeight') and source == 'GALWEM17': + x = idx(parameterCategory=v['parameterCategory'],parameterNumber=v['parameterNumber'],typeOfFirstFixedSurface=v['typeOfFirstFixedSurface'])[1] # by getting element 1, you get a pygrib message + else: + x = idx(parameterCategory=v['parameterCategory'],parameterNumber=v['parameterNumber'],typeOfFirstFixedSurface=v['typeOfFirstFixedSurface'])[0] # by getting element 0, you get a pygrib message + if x.shortName != v['shortName']: print('Name mismatch!') + #ADDED BY JOHN O + print(x) + print('Reading ', x.shortName, 'at level ', x.typeOfFirstFixedSurface) + read_var = x.values # same x.data()[0] + read_missing = x.missingValue + print('missing value = ',read_missing) + + # The missing value (read_missing) for GALWEM17 and GALWEM cloud base/height is 9999, which is not the best choice because + # those could be actual values. So we need to use the masked array part (below) to handle which + # values are missing. We also set read_missing to something unphysical to essentially disable it. + # Finally, if we don't change the 'missingValue' property in the GRIB2 file we are eventually outputting, + # the bitmap will get all messed up, because it will be based on 9999 instead of $missing_values + if variable == 'cloudTopHeight' or variable == 'cloudBaseHeight': + read_missing = -9999. + x['missingValue'] = read_missing + if source == 'GALWEM17': + #These are masked numpy arrays, with mask = True where there is a missing value (no cloud) + #Use np.ma.filled to create an ndarray where mask = True values are set to np.nan + read_var = np.ma.filled(read_var.astype(read_var.dtype), np.nan) + elif ftype == 'nc': + read_var = nc_fid.variables[v] # extract/copy the data + try: + read_missing = read_var.missing_value # get variable attributes. Each dataset has own missing values. + except: + read_missing = -9999. # set a default missing value. probably only need to do this for MPAS + + print('Reading ', v) + + this_var = np.array( read_var ) # to numpy array + #print(read_missing, np.nan) + this_var = np.where( this_var==read_missing, np.nan, this_var ) + #print(this_var.shape) + data.append(this_var) # ith element of the list is a NUMPY ARRAY for the ith variable + #print(type(this_var)) + #print(type(data)) + + # Call a function to get the variable of interest. + # Add a new function for each variable + if variable == 'binaryCloud': raw_data = getBinaryCloud(source,data) + if variable == 'totalCloudFrac': raw_data = getTotalCloudFrac(source,data) + if variable == 'lowCloudFrac': raw_data = getLayerCloudFrac(source,data,'low') + if variable == 'midCloudFrac': raw_data = getLayerCloudFrac(source,data,'mid') + if variable == 'highCloudFrac': raw_data = getLayerCloudFrac(source,data,'high') + if variable == 'cloudTopTemp': raw_data = getCloudTopTemp(source,data) + if variable == 'cloudTopPres': raw_data = getCloudTopPres(source,data) + if variable == 'cloudTopHeight': raw_data = getCloudTopHeight(source,data) + if variable == 'cloudBaseHeight': raw_data = getCloudBaseHeight(source,data) + if variable == 'cloudCeiling': raw_data = getCloudCeiling(source,data) + + raw_data = np.where(np.isnan(raw_data), missing_values, raw_data) # replace np.nan to missing_values (for MET) + + # Array met_data is passed to MET + # Graphics should plot $met_data to make sure things look correct + if griddedDatasets[source]['flipY']: + print('flipping ',source,' data about y-axis') + met_data=np.flip(raw_data,axis=0).astype(float) + else: + met_data=raw_data.astype(float) + + # Make plotting optional or Just use plot_data_plane +# plt_data=np.where(met_data<0, np.nan, met_data) +# map=Basemap(projection='cyl',llcrnrlat=-90,urcrnrlat=90,llcrnrlon=-180,urcrnrlon=180,resolution='c') +# map.drawcoastlines() +# map.drawcountries() +# map.drawparallels(np.arange(-90,90,30),labels=[1,1,0,1]) +# map.drawmeridians(np.arange(0,360,60),labels=[1,1,0,1]) +# plt.contourf(lons,lats,plt_data,20,origin='upper',cmap=cm.Greens) #cm.gist_rainbow) +# title=source+"_"+variable+"_"+str(validTime) +# plt.title(title) +# plt.colorbar(orientation='horizontal') +# plt.savefig(title+".png") + + # If a forecast file, output a GRIB file with + # 1 record containing the met_data + # This is a hack, because right now, MET python embedding doesn't work with pygrib, + # so output the data to a temporary file, and then have MET read the temporary grib file. + # Starting with version 9.0 of MET, the hack isn't needed, and MET python embedding works with pygrib + outputFcstFile = False # MUST be True for MET version < 9.0. For MET 9.0+, optional + if dataSource == 1 and ftype == 'grib': + if outputFcstFile: + grbtmp = x + grbtmp['values']=met_data + grbout = open('temp_fcst.grb2','ab') + grbout.write(grbtmp.tostring()) + grbout.close() # Close the outfile GRIB file + print('Successfully output temp_fcst.grb2') + + # Close files + if ftype == 'grib': idx.close() # Close the input GRIB file + if ftype == 'nc': nc_fid.close() # Close the netCDF file + + return met_data + +def obsError(fcstData,obsErrorFile,validDate,dataSource): + + print('Adding noise to the cloud fraction fields') + print('Using obsErrorFile',obsErrorFile) + + # First load the obsError information + #obsErrorFile = 'ob_errors.pk' + infile = open(obsErrorFile,'rb') + binEdges, binStddev = pk.load(infile) # 'numpy.ndarray' types + infile.close() + + # Get 1d forecast data + shape = fcstData.shape + fcst = fcstData.flatten() + + # Set random number seed based on valid time and model + if dataSource.upper().strip() == 'MPAS': ii = 10 + elif dataSource.upper().strip() == 'GALWEM': ii = 20 + elif dataSource.upper().strip() == 'GFS': ii = 30 + np.random.seed(int(validDate*.1 + ii)) + + # Find which bin the data is in + for i in range(0,len(binEdges)-1): + idx = np.where( (fcst >= binEdges[i]) & (fcst < binEdges[i+1]) )[0] + n = len(idx) # number of points in the ith bin + if n > 0: # check for empty bins + randVals = np.random.normal(0,binStddev[i],n) + fcst[idx] = fcst[idx] + randVals + + # bound forecast values to between 0 and 100% + fcst = np.where( fcst < 0.0, 0.0, fcst) + fcst = np.where( fcst > 100.0, 100.0, fcst) + + # now reshape forecast data back to 2D + output = fcst.reshape(shape) + + # data will have NaNs where bad. + return output + +def getFcstCloudFrac(cfr,pmid,psfc,layerDefinitions): # cfr is cloud fraction(%), pmid is 3D pressure(Pa), psfc is surface pressure (Pa) code from UPP ./INITPOST.F + + if pmid.shape != cfr.shape: # sanity check + print('dimension mismatch bewteen cldfra and pressure') + sys.exit() + + nlocs, nlevs = pmid.shape + + if len(psfc) != nlocs: # another sanity check + print('dimension mismatch bewteen cldfra and surface pressure') + sys.exit() + + cfracl = np.zeros(nlocs) + cfracm = np.zeros(nlocs) + cfrach = np.zeros(nlocs) + + for i in range(0,nlocs): + + PTOP_HIGH = PTOP_HIGH_UPP + if layerDefinitions.upper().strip() == 'ERA5': + PTOP_LOW = 0.8*psfc[i] + PTOP_MID = 0.45*psfc[i] + elif layerDefinitions.upper().strip() == 'UPP': + PTOP_LOW = PTOP_LOW_UPP + PTOP_MID = PTOP_MID_UPP + + idxLow = np.where( pmid[i,:] >= PTOP_LOW)[0] # using np.where with just 1 argument returns tuple + idxMid = np.where( (pmid[i,:] < PTOP_LOW) & (pmid[i,:] >= PTOP_MID))[0] + idxHigh = np.where( (pmid[i,:] < PTOP_MID) & (pmid[i,:] >= PTOP_HIGH))[0] + + # use conditions in case all indices are missing + if (len(idxLow) >0 ): cfracl[i] = np.max( cfr[i,idxLow] ) + if (len(idxMid) >0 ): cfracm[i] = np.max( cfr[i,idxMid] ) + if (len(idxHigh) >0 ): cfrach[i] = np.max( cfr[i,idxHigh] ) + + tmp = np.vstack( (cfracl,cfracm,cfrach)) # stack the rows into one 2d array + cldfraMax = np.max(tmp,axis=0) # get maximum value across low/mid/high for each pixel (minimum overlap assumption) + + # This is the fortran code put into python format...double loop unnecessary and slow + #for i in range(0,nlocs): + # for k in range(0,nlevs): + # if pmid(i,k) >= PTOP_LOW: + # cfracl(i) = np.max( [cfracl(i),cfr(i,k)] ) # Low + # elif pmid(i,k) < PTOP_LOW and pmid(i,k) >= PTOP_MID: + # cfracm(i) = np.max( [cfracm(i),cfr(i,k)] ) # Mid + # elif pmid(i,k) < PTOP_MID and pmid(i,k) >= PTOP_HIGH: # High + # cfrach(i) = np.max( [cfrach(i),cfr(i,k)] ) + + return cfracl, cfracm, cfrach, cldfraMax + +def getGOES16LatLon(g16_data_file): + + # Start timer + startTime = dt.datetime.utcnow() + + # designate dataset + g16nc = Dataset(g16_data_file, 'r') + + # GOES-R projection info and retrieving relevant constants + proj_info = g16nc.variables['goes_imager_projection'] + lon_origin = proj_info.longitude_of_projection_origin + H = proj_info.perspective_point_height+proj_info.semi_major_axis + r_eq = proj_info.semi_major_axis + r_pol = proj_info.semi_minor_axis + + # Data info + lat_rad_1d = g16nc.variables['x'][:] + lon_rad_1d = g16nc.variables['y'][:] + + # close file when finished + g16nc.close() + g16nc = None + + # create meshgrid filled with radian angles + lat_rad,lon_rad = np.meshgrid(lat_rad_1d,lon_rad_1d) + + # lat/lon calc routine from satellite radian angle vectors + + lambda_0 = (lon_origin*np.pi)/180.0 + + a_var = np.power(np.sin(lat_rad),2.0) + (np.power(np.cos(lat_rad),2.0)*(np.power(np.cos(lon_rad),2.0)+(((r_eq*r_eq)/(r_pol*r_pol))*np.power(np.sin(lon_rad),2.0)))) + b_var = -2.0*H*np.cos(lat_rad)*np.cos(lon_rad) + c_var = (H**2.0)-(r_eq**2.0) + + r_s = (-1.0*b_var - np.sqrt((b_var**2)-(4.0*a_var*c_var)))/(2.0*a_var) + + s_x = r_s*np.cos(lat_rad)*np.cos(lon_rad) + s_y = - r_s*np.sin(lat_rad) + s_z = r_s*np.cos(lat_rad)*np.sin(lon_rad) + + lat = (180.0/np.pi)*(np.arctan(((r_eq*r_eq)/(r_pol*r_pol))*((s_z/np.sqrt(((H-s_x)*(H-s_x))+(s_y*s_y)))))) + lon = (lambda_0 - np.arctan(s_y/(H-s_x)))*(180.0/np.pi) + + # End timer + endTime = dt.datetime.utcnow() + time = (endTime - startTime).microseconds / (1000.0*1000.0) + print('took %f4.1 seconds to get GOES16 lat/lon'%(time)) + + return lon,lat # lat/lon are 2-d arrays + +# -- +def getGOESRetrivalData(goesFile,goesVar): + + if not os.path.exists(goesFile): + print(goesFile+' not there. exit') + sys.exit() + + # First get GOES lat/lon + goesLon2d, goesLat2d = getGOES16LatLon(goesFile) # 2-d arrays + goesLon = goesLon2d.flatten() # 1-d arrays + goesLat = goesLat2d.flatten() + + # Now open the file and get the data we want + nc_goes = Dataset(goesFile, "r", format="NETCDF4") + + # If the next line is true (it should be), this indicates the variable needs to be treated + # as an "unsigned 16-bit integer". This is a pain. So we must use the "astype" method + # to change the variable type BEFORE applying scale_factor and add_offset. After the conversion + # we then can manually apply the scale factor and offset + #goesVar = 'PRES' + goesVar = goesVar.strip() # for safety + if nc_goes.variables[goesVar]._Unsigned.lower().strip() == 'true': + nc_goes.set_auto_scale(False) # Don't automatically apply scale_factor and add_offset to variable + goesData2d = np.array( nc_goes.variables[goesVar]).astype(np.uint16) + goesData2d = goesData2d * nc_goes.variables[goesVar].scale_factor + nc_goes.variables[goesVar].add_offset + goesQC2d = np.array( nc_goes.variables['DQF']).astype(np.uint8) + else: + goesData2d = np.array( nc_goes.variables[goesVar]) + goesQC2d = np.array( nc_goes.variables['DQF']) + + # Make variables 1-d + goesQC = goesQC2d.flatten() + goesData = goesData2d.flatten() + nc_goes.close() + + # Get rid of NaNs; base it on longitude + goesData = goesData[~np.isnan(goesLon)] # Handle data arrays first before changing lat/lon itself + goesQC = goesQC[~np.isnan(goesLon)] + goesLon = goesLon[~np.isnan(goesLon)] # ~ is "logical not", also np.logical_not + goesLat = goesLat[~np.isnan(goesLat)] + if goesLon.shape != goesLat.shape: + print('GOES lat/lon shape mismatch') + sys.exit() + + # If goesQC == 0, good QC and there was a cloud with a valid pressure. + # If goesQC == 4, no cloud; probably clear sky. + # All other QC means no data, and we want to remove those points + idx = np.logical_or( goesQC == 0, goesQC == 4) # Only keep QC == 0 or 4 + goesData = goesData[idx] + goesQC = goesQC[idx] + goesLon = goesLon[idx] + goesLat = goesLat[idx] + + # Only QC with 0 or 4 are left; now set QC == 4 to missing to indicate clear sky + goesData = np.where( goesQC != 0, missing_values, goesData) + + # Get longitude to between (0,360) for consistency with JEDI files (this check is applied to JEDI files, too) + goesLon = np.where( goesLon < 0, goesLon + 360.0, goesLon ) + + print('Min GOES Lon = ',np.min(goesLon)) + print('Max GOES Lon = ',np.max(goesLon)) + + return goesLon, goesLat, goesData + +def point2point(source,inputDir,satellite,channel,goesFile,condition,layerDefinitions,dataSource): + + # Static Variables for QC and obs + qcVar = 'brightness_temperature_'+str(channel)+'@EffectiveQC' #'@EffectiveQC0' # QC variable + obsVar = 'brightness_temperature_'+str(channel)+'@ObsValue' # Observation variable + + # Get GOES-16 retrieval file with auxiliary information + if 'abi' in satellite or 'ahi' in satellite: + goesLon, goesLat, goesData = getGOESRetrivalData(goesFile,'PRES') # return 1-d arrays + lonlatGOES = np.array( list(zip(goesLon, goesLat))) # lon/lat pairs for each GOES ob (nobs_GOES, 2) + #print('shape lonlatGOES = ',lonlatGOES.shape) + print('getting data from ',goesFile) + myGOESInterpolator = NearestNDInterpolator(lonlatGOES,goesData) + + # First check to see if there's a concatenated file with all obs. + # If so, use that. If not, have to process one file per processor, which takes a lot more time + if os.path.exists(inputDir+'/obsout_omb_'+satellite+'_ALL.nc4'): + inputFiles = [inputDir+'/obsout_omb_'+satellite+'_ALL.nc4'] # needs to be in a list since we loop over inputFiles + else: + # Get list of OMB files to process. There is one file per processor. + # Need to get them in order so they are called in the same order for the + # forecast and observed passes through this subroutine. + files = os.listdir(inputDir) + inputFiles = fnmatch.filter(files,'obsout*_'+satellite+'*nc4') # returns relative path names + inputFiles = [inputDir+'/'+s for s in inputFiles] # add on directory name + inputFiles.sort() # Get in order from low to high + if len(inputFiles) == 0: return -99999, -99999 # if no matching files, force a failure + + # Variable to pull for brightness temperature +# if dataSource == 1: v = 'brightness_temperature_'+str(channel)+'@GsiHofXBc' # Forecast variable + if dataSource == 1: v = 'brightness_temperature_'+str(channel)+'@hofx' #'@depbg' # OMB + if dataSource == 2: v = obsVar + + # Read the files and put data in array + allData, allDataQC = [], [] + for inputFile in inputFiles: + nc_fid = Dataset(inputFile, "r", format="NETCDF4") #Dataset is the class behavior to open the file + print('Trying to read ',v,' from ',inputFile) + + # Read forecast/obs data + read_var = nc_fid.variables[v] # extract/copy the data + # read_missing = read_var.missing_value # get variable attributes. Each dataset has own missing values. + this_var = np.array( read_var ) # to numpy array + # this_var = np.where( this_var==read_missing, np.nan, this_var ) + + #if dataSource == 1: # If true, we just read in OMB data, but we want B + # obsData = np.array( nc_fid.variables[obsVar]) + # this_var = obsData - this_var # get background/forecast value (O - OMB = B) + + #Read QC data + qcData = np.array(nc_fid.variables[qcVar]) + + # Sanity check...shapes should match + if qcData.shape != this_var.shape: return -99999, -99999 + + if 'abi' in satellite or 'ahi' in satellite: + + # Get the GOES-16 retrieval data at the observation locations in this file + # GOES values < 0 mean clear sky + lats = np.array(nc_fid.variables['latitude@MetaData']) + lons = np.array(nc_fid.variables['longitude@MetaData']) + + # Get longitude to between (0,360) for consistency with GOES-16 files + lons = np.where( lons < 0, lons + 360.0, lons ) + + lonlat = np.array( list(zip(lons,lats))) # lon/lat pairs for each ob (nobs, 2) + thisGOESData = myGOESInterpolator(lonlat) # GOES data at obs locations in this file. If pressure, units are hPa + thisGOESData = thisGOESData * 100.0 # get into Pa + + #obsCldfra = np.array( nc_fid.variables['cloud_area_fraction@MetaData'] )*100.0 # Get into %...observed cloud fraction (AHI/ABI only) + + geoValsFile = inputFile.replace('obsout','geoval') + if not os.path.exists(geoValsFile): + print(geoValsFile+' not there. exit') + sys.exit() + + nc_fid2 = Dataset(geoValsFile, "r", format="NETCDF4") + fcstCldfra = np.array( nc_fid2.variables['cloud_area_fraction_in_atmosphere_layer'])*100.0 # Get into % + pressure = np.array( nc_fid2.variables['air_pressure']) # Pa + pressure_edges = np.array( nc_fid2.variables['air_pressure_levels']) # Pa + psfc = pressure_edges[:,-1] # Surface pressure (Pa)...array order is top down + if layerDefinitions.upper().strip() == 'ERA5': + PTOP_LOW = 0.8*psfc # these are arrays + PTOP_MID = 0.45*psfc + PTOP_HIGH = PTOP_HIGH_UPP * np.ones_like(psfc) + elif layerDefinitions.upper().strip() == 'UPP': + PTOP_LOW = PTOP_LOW_UPP # these are constants + PTOP_MID = PTOP_MID_UPP + PTOP_HIGH = PTOP_HIGH_UPP + else: + print('layerDefinitions = ',layerDefinitions,'is invalid. exit') + sys.exit() + fcstLow,fcstMid,fcstHigh,fcstTotCldFra = getFcstCloudFrac(fcstCldfra,pressure,psfc,layerDefinitions) # get low/mid/high/total forecast cloud fractions for each ob + nc_fid2.close() + + # Modify QC data based on correspondence between forecast and obs. qcData used to select good data later + # It's possible that there are multiple forecast layers, such that fcstLow,fcstMid,fcstHigh are all > $cldfraThresh + # However, GOES-16 CTP doesn't really account for layering. So, we need to remove layered clouds from the forecast, + # focusing only on the layers that we asked for when doing {low,mid,high}Only conditions + # The "|" is symbol for "np.logcal_or" + yes = 2.0 + no = 0.0 + cldfraThresh = 20.0 # percent + if qcData.shape == fcstTotCldFra.shape == thisGOESData.shape: # these should all match + print('Using condition ',condition,'for ABI/AHI') + + # Note that "&" is "np.logical_and" for boolean (true/false) quantities. + # Thus, each condition should be enclosed in parentheses + if condition.lower().strip() == 'clearOnly'.lower(): # clear in both forecast and obs + qcData = np.where( (fcstTotCldFra < cldfraThresh) & (thisGOESData <= 0.0), qcData, missing_values) + elif condition.lower().strip() == 'cloudyOnly'.lower(): # cloudy in both forecast and obs + qcData = np.where( (fcstTotCldFra >= cldfraThresh) & (thisGOESData > 0.0), qcData, missing_values) + elif condition.lower().strip() == 'lowOnly'.lower(): # low clouds in both forecast and obs + fcstLow = np.where( (fcstMid >= cldfraThresh) | ( fcstHigh >= cldfraThresh), missing_values, fcstLow) # remove mid, high + qcData = np.where( (fcstLow >= cldfraThresh) & ( thisGOESData >= PTOP_LOW), qcData, missing_values) + elif condition.lower().strip() == 'midOnly'.lower(): # mid clouds in both forecast and obs + fcstMid = np.where( (fcstLow >= cldfraThresh) | ( fcstHigh >= cldfraThresh), missing_values, fcstMid) # remove low, high + qcData = np.where( (fcstMid >= cldfraThresh) & (thisGOESData < PTOP_LOW) & (thisGOESData >= PTOP_MID), qcData, missing_values) + elif condition.lower().strip() == 'highOnly'.lower(): # high clouds in both forecast and obs + fcstHigh = np.where( (fcstLow >= cldfraThresh) | ( fcstMid >= cldfraThresh), missing_values, fcstHigh) # remove mid, high + qcData = np.where( (fcstHigh >= cldfraThresh) & (thisGOESData < PTOP_MID) & (thisGOESData >= PTOP_HIGH), qcData, missing_values) + elif condition.lower().strip() == 'fcstLow'.lower(): # low clouds in forecast (layers possible); obs could be anything + qcData = np.where( fcstLow >= cldfraThresh , qcData, missing_values) + elif condition.lower().strip() == 'fcstMid'.lower(): # low clouds in forecast (layers possible); obs could be anything + qcData = np.where( fcstMid >= cldfraThresh , qcData, missing_values) + elif condition.lower().strip() == 'fcstHigh'.lower(): # low clouds in forecast (layers possible); obs could be anything + qcData = np.where( fcstHigh >= cldfraThresh , qcData, missing_values) + elif condition.lower().strip() == 'cloudEventLow'.lower(): + if dataSource == 1: this_var = np.where( fcstLow >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( thisGOESData >= PTOP_LOW, yes, no ) + elif condition.lower().strip() == 'cloudEventMid'.lower(): + if dataSource == 1: this_var = np.where( fcstMid >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( (thisGOESData < PTOP_LOW) & (thisGOESData >= PTOP_MID), yes, no ) + elif condition.lower().strip() == 'cloudEventHigh'.lower(): + if dataSource == 1: this_var = np.where( fcstHigh >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( (thisGOESData < PTOP_MID) & (thisGOESData >= PTOP_HIGH), yes, no ) + elif condition.lower().strip() == 'cloudEventTot'.lower(): + if dataSource == 1: this_var = np.where( fcstTotCldFra >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( thisGOESData > 0.0, yes, no ) + elif condition.lower().strip() == 'all': + print("not doing any conditional verification or stratifying by event") + else: + print("condition = ",condition," not recognized.") + sys.exit() + #elif condition.lower().strip() == '4x4table'.lower(): + #if dataSource == 1: + # this_var = np.where( fcstLow >= cldfraThresh, yesLow, no ) + # this_var = this_var + np.where( fcstMid >= cldfraThresh, yesMid, no ) + # this_var = this_var + np.where( fcstHigh >= cldfraThresh, yesHigh, no ) + print('number removed = ', (qcData==missing_values).sum()) + #print('number passed = ', qcData.shape[0] - (qcData==missing_values).sum()) + else: + print('shape mismatch') + return -99999, -99999 + + # Append to arrays + allData.append(this_var) + allDataQC.append(qcData) + + nc_fid.close() # done with the file, so close it before going to next file in loop + + # We're now all done looping over the individul files + + # Get the indices with acceptable QC + allQC = np.concatenate(allDataQC) # Put list of numpy arrays into a single long 1-D numpy array. All QC data. + idx = np.where(allQC==0) # returns indices + + # Now get all the forecast/observed brightness temperature data with acceptable QC + this_var = np.concatenate(allData)[idx] # Put list of numpy arrays into a single long 1-D numpy array. This is all the forecast/obs data with good QC + numObs = this_var.shape[0] # number of points with good QC for this channel + print('Number of obs :',numObs) + + # Assume all the points actually fit into a square grid. Get the side of the square (use ceil to round up) + if numObs > 0: + l = np.ceil(np.sqrt(numObs)).astype('int') # Length of the side of the square + + # Make an array that can be reshaped into the square + raw_data1D = np.full(l*l,np.nan) # Initialize 1D array of length l**2 to np.nan + raw_data1D[0:numObs] = this_var[:] # Fill data to the extent possible. There will be some np.nan values at the end + raw_data = np.reshape(raw_data1D,(l,l)) # Reshape into "square grid" + + raw_data = np.where(np.isnan(raw_data), missing_values, raw_data) # replace np.nan to missing_values (for MET) + + met_data=raw_data.astype(float) # Give MET this info + + # Now need to tell MET the "grid" for the data + # Make a fake lat/lon grid going from 0.0 to 50.0 degrees, with the interval determined by number of points + griddedDatasets[source]['latDef'][0] = 0.0 # starting point + griddedDatasets[source]['latDef'][1] = np.diff(np.linspace(0,50,l)).round(6)[0] # interval (degrees) + griddedDatasets[source]['latDef'][2] = int(l) # number of points + griddedDatasets[source]['lonDef'][0:3] = griddedDatasets[source]['latDef'] + + gridInfo = getGridInfo(source, griddedDatasets[source]['gridType']) # 'LatLon' gridType + return met_data, gridInfo + + else: + return -99999, -99999 + +########### +def getGridInfo(source,gridType): + + if gridType == 'LatLon': + latDef = griddedDatasets[source]['latDef'] + lonDef = griddedDatasets[source]['lonDef'] + gridInfo = { + 'type': gridType, + 'name': source, + 'lat_ll': latDef[0], #-90.000, + 'lon_ll': lonDef[0], #-180.000, + 'delta_lat': latDef[1], #0.5000, + 'delta_lon': lonDef[1], #0.625, + 'Nlat': latDef[2], #361, + 'Nlon': lonDef[2], #576, + } + elif gridType == 'Gaussian': + gridInfo = { + 'type': gridType, + 'name': source, + 'nx': griddedDatasets[source]['nx'], + 'ny': griddedDatasets[source]['ny'], + 'lon_zero': griddedDatasets[source]['lon_zero'], + } + + return gridInfo + +def getAttrArray(source,variable,initTime,validTime): + + init = dt.datetime.strptime(initTime,"%Y%m%d%H") + valid = dt.datetime.strptime(validTime,"%Y%m%d%H") + lead, rem = divmod((valid-init).total_seconds(), 3600) + + attrs = { + + 'valid': valid.strftime("%Y%m%d_%H%M%S"), + 'init': init.strftime("%Y%m%d_%H%M%S"), + 'lead': str(int(lead)), + 'accum': '000000', + + 'name': variable, #'MERRA2_Cloud_Percentage' + 'long_name': variable, #'Cloud Percentage Levels', + 'level': 'ALL', + 'units': verifVariables[variable]['units'], + + 'grid': getGridInfo(source,griddedDatasets[source]['gridType']) + } + + #print(attrs) + #print(griddedDatasets[source]) + + return attrs + +######## END FUNCTIONS ########## + + +#if __name__ == "__main__": +dataFile, dataSource, variable, i_date, v_date, flag = sys.argv[1].split(":") +met_data = getDataArray(dataFile,dataSource,variable,flag) +attrs = getAttrArray(dataSource,variable,i_date,v_date) +print(attrs) From 243d0782d2f27fa244ffb0f470c344ae89727729 Mon Sep 17 00:00:00 2001 From: j-opatz Date: Mon, 5 Jun 2023 14:53:47 -0600 Subject: [PATCH 4/6] final use case, corrected paths for output --- .github/parm/use_case_groups.json | 2 +- ...at_fcstGFS_obsERA5_lowAndTotalCloudFrac.py | 137 +++ ..._fcstGFS_obsMERRA2_lowAndTotalCloudFrac.py | 2 +- ...cstGFS_obsSATCORPS_cloudTopPressAndTemp.py | 2 +- .../GridStat_fcstMPAS_obsERA5_cloudBaseHgt.py | 10 +- ...fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.py | 2 +- internal/tests/use_cases/all_use_cases.txt | 1 + ..._fcstGFS_obsERA5_lowAndTotalCloudFrac.conf | 204 ++++ .../read_input_data.py | 911 ++++++++++++++++++ 9 files changed, 1262 insertions(+), 9 deletions(-) create mode 100644 docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.py create mode 100644 parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.conf create mode 100755 parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac/read_input_data.py diff --git a/.github/parm/use_case_groups.json b/.github/parm/use_case_groups.json index bc0c8b4ec9..643f7a568e 100644 --- a/.github/parm/use_case_groups.json +++ b/.github/parm/use_case_groups.json @@ -11,7 +11,7 @@ }, { "category": "air_quality_and_comp", - "index_list": "0-5", + "index_list": "0-6", "run": false }, { diff --git a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.py b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.py new file mode 100644 index 0000000000..9c389e9566 --- /dev/null +++ b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.py @@ -0,0 +1,137 @@ +""" +GridStat: Cloud Fractions with Neighborhood and Probabilities +============================================================= + +model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.conf + +""" +############################################################################## +# Scientific Objective +# -------------------- +# +# This use case captures various statistical measures of two model comparisons +# for low and total cloud fractions with different neighborhood +# settings for internal model metrics and to aid in future model updates +# + +############################################################################## +# Datasets +# -------- +# +# | **Forecast:** Global Forecast System (GFS) +# | **Observations:** ECMWF Reanalysis, Version 5 (ERA5) +# | **Grid:** GPP 17km masking region +# +# | **Location:** All of the input data required for this use case can be found in the met_test sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases +# | This tarball should be unpacked into the directory that you will set the value of INPUT_BASE. See 'Running METplus' section for more information. +# + +############################################################################## +# METplus Components +# ------------------ +# +# This use case utilizes Python Embedding, which is called using the PYTHON_NUMPY keyword +# in the observation input template settings. The same Python script can processes both forecast and +# observation datasets, but only the observation dataset is not +# set up for native ingest by MET. Two separate forecast fields are verified against two respective observation fields, +# with the Python script being passed the input file, the model name, the variable name being analyzed, +# the initialization and valid times, and a flag to indicate if the field passed is observation or forecast. +# This process is repeated with 3 instance names to GridStat, each with a different setting for regridding, +# neighborhood evaluation, thresholding, output line types, and output prefix names. + +############################################################################## +# METplus Workflow +# ---------------- +# +# GridStat is the only MET tool called in this example. +# It processes the following run time: +# +# | **Init:** 2022-07-03 12Z +# | **Forecast lead:** 36 hour +# | +# Because instance names are used, GridStat will run 3 times for this 1 initalization time. + +############################################################################## +# METplus Configuration +# --------------------- +# +# METplus first loads the default configuration file found in parm/metplus_config, +# then it loads any configuration files passed to METplus via the command line: +# parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.conf +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.conf + +############################################################################## +# MET Configuration +# ----------------- +# +# METplus sets environment variables based on user settings in the METplus configuration file. +# See :ref:`How METplus controls MET config file settings` for more details. +# +# **YOU SHOULD NOT SET ANY OF THESE ENVIRONMENT VARIABLES YOURSELF! THEY WILL BE OVERWRITTEN BY METPLUS WHEN IT CALLS THE MET TOOLS!** +# +# If there is a setting in the MET configuration file that is currently not supported by METplus you'd like to control, please refer to: +# :ref:`Overriding Unsupported MET config file settings` +# +# .. note:: See the :ref:`GridStat MET Configuration` section of the User's Guide for more information on the environment variables used in the file below: +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/met_config/GridStatConfig_wrapped + +############################################################################## +# Python Embedding +# ---------------- +# +# This use case utilizes 1 Python script to read and process the observation fields. +# parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac/read_input_data.py +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac/read_input_data.py + +############################################################################## +# Running METplus +# --------------- +# +# Pass the use case configuration file to the run_metplus.py script +# along with any user-specific system configuration files if desired:: +# +# run_metplus.py /path/to/METplus/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.conf /path/to/user_system.conf +# +# See :ref:`running-metplus` for more information. + +############################################################################## +# Expected Output +# --------------- +# +# A successful run will output the following both to the screen and to the logfile:: +# +# INFO: METplus has successfully finished running. +# +# Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. +# Output for this use case will be found in model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac +# (relative to **OUTPUT_BASE**) +# and will contain the following files: +# +# * grid_stat_GFS_to_ERA5_F36_CloudFracs_360000L_20220705_000000V_pairs.nc +# * grid_stat_GFS_to_ERA5_F36_CloudFracs_360000L_20220705_000000V.stat +# * grid_stat_GFS_to_ERA5_F36_CloudFracs_NBR_360000L_20220705_000000V_pairs.nc +# * grid_stat_GFS_to_ERA5_F36_CloudFracs_NBR_360000L_20220705_000000V.stat +# * grid_stat_GFS_to_ERA5_F36_CloudFracs_PROB_360000L_20220705_000000V_pairs.nc +# * grid_stat_GFS_to_ERA5_F36_CloudFracs_PRB_360000L_20220705_000000V.stat + + +############################################################################## +# Keywords +# -------- +# +# .. note:: +# +# * GridStatToolUseCase +# * NetCDFFileUseCase +# * AirQualityAndCompAppUseCase +# * PythonEmbeddingFileUseCase +# +# Navigate to the :ref:`quick-search` page to discover other similar use cases. +# +# sphinx_gallery_thumbnail_path = '_static/air_quality_and_comp-GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.png' diff --git a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.py b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.py index b11f018c1b..37228aaadc 100644 --- a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.py +++ b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.py @@ -110,7 +110,7 @@ # INFO: METplus has successfully finished running. # # Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. -# Output for this use case will be found in model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac +# Output for this use case will be found in model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac # (relative to **OUTPUT_BASE**) # and will contain the following files: # diff --git a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.py b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.py index a7700e06f9..90c27878f9 100644 --- a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.py +++ b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.py @@ -109,7 +109,7 @@ # INFO: METplus has successfully finished running. # # Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. -# Output for this use case will be found in model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac +# Output for this use case will be found in model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp # (relative to **OUTPUT_BASE**) # and will contain the following files: # diff --git a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.py b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.py index b2f481b0d7..58f94a519b 100644 --- a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.py +++ b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.py @@ -109,14 +109,14 @@ # INFO: METplus has successfully finished running. # # Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. -# Output for this use case will be found in model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac +# Output for this use case will be found in model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt # (relative to **OUTPUT_BASE**) # and will contain the following files: # -# * grid_stat_MPAS_to_ERA5_F36_CloudFracs_360000L_20200724_120000V_pairs.nc -# * grid_stat_MPAS_to_ERA5_F36_CloudFracs_360000L_20200724_120000V.stat -# * grid_stat_MPAS_to_ERA5_F36_CloudFracs_NBR_360000L_20200724_120000V_pairs.nc -# * grid_stat_MPAS_to_ERA5_F36_CloudFracs_NBR_360000L_20200724_120000V.stat +# * grid_stat_MPAS_to_ERA5_F36_CloudBaseHgt_360000L_20200724_120000V_pairs.nc +# * grid_stat_MPAS_to_ERA5_F36_CloudBaseHgt_360000L_20200724_120000V.stat +# * grid_stat_MPAS_to_ERA5_F36_CloudBaseHgt_NBR_360000L_20200724_120000V_pairs.nc +# * grid_stat_MPAS_to_ERA5_F36_CloudBaseHgt_NBR_360000L_20200724_120000V.stat ############################################################################## # Keywords diff --git a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.py b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.py index 395d37c3f6..1c37c29e05 100644 --- a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.py +++ b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.py @@ -109,7 +109,7 @@ # INFO: METplus has successfully finished running. # # Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. -# Output for this use case will be found in model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac +# Output for this use case will be found in model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac # (relative to **OUTPUT_BASE**) # and will contain the following files: # diff --git a/internal/tests/use_cases/all_use_cases.txt b/internal/tests/use_cases/all_use_cases.txt index 73e1838de5..bfd9f6c74d 100644 --- a/internal/tests/use_cases/all_use_cases.txt +++ b/internal/tests/use_cases/all_use_cases.txt @@ -69,6 +69,7 @@ Category: air_quality_and_comp 3::GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac::model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.conf:: pandac_env, py_embed 4::GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac::model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.conf:: pandac_env, py_embed 5::GridStat_fcstMPAS_obsERA5_cloudBaseHgt::model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.conf:: pandac_env, py_embed +6::GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac::model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.conf:: pandac_env, py_embed Category: climate diff --git a/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.conf b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.conf new file mode 100644 index 0000000000..8ca3e6dc08 --- /dev/null +++ b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.conf @@ -0,0 +1,204 @@ +[config] + +# Documentation for this use case can be found at +# https://metplus.readthedocs.io/en/latest/generated/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.html + +# For additional information, please see the METplus Users Guide. +# https://metplus.readthedocs.io/en/latest/Users_Guide + +# ### +# Processes to run +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#process-list +### + +PROCESS_LIST = GridStat, GridStat(nbr), GridStat(prob) + +### +# Time Info +# LOOP_BY options are INIT, VALID, RETRO, and REALTIME +# If set to INIT or RETRO: +# INIT_TIME_FMT, INIT_BEG, INIT_END, and INIT_INCREMENT must also be set +# If set to VALID or REALTIME: +# VALID_TIME_FMT, VALID_BEG, VALID_END, and VALID_INCREMENT must also be set +# LEAD_SEQ is the list of forecast leads to process +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#timing-control +### + +LOOP_BY = INIT +INIT_TIME_FMT = %Y%m%d%H +INIT_BEG=2022070312 +INIT_END=2022070312 +INIT_INCREMENT = 12H + +LEAD_SEQ = 36 + +LOOP_ORDER = times + +### +# File I/O +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#directory-and-filename-template-info +### + +FCST_GRID_STAT_INPUT_DIR = {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac +FCST_GRID_STAT_INPUT_TEMPLATE = {init?fmt=%Y%m%d}_gfs.t12z.pgrb2.0p25.f0{LEAD_SEQ} + +OBS_GRID_STAT_INPUT_DIR = +OBS_GRID_STAT_INPUT_TEMPLATE = PYTHON_NUMPY + +GRID_STAT_CLIMO_MEAN_INPUT_DIR = +GRID_STAT_CLIMO_MEAN_INPUT_TEMPLATE = + +GRID_STAT_CLIMO_STDEV_INPUT_DIR = +GRID_STAT_CLIMO_STDEV_INPUT_TEMPLATE = + +GRID_STAT_OUTPUT_DIR = {OUTPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac +GRID_STAT_OUTPUT_TEMPLATE = + + +### +# Field Info +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#field-info +### + +MODEL = GFS +OBTYPE = ERA5 + +CONFIG_DIR = {PARM_BASE}/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac + +FCST_VAR1_NAME = TCDC +FCST_VAR1_LEVELS = R636 +FCST_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 +FCST_VAR2_NAME = LCDC +FCST_VAR2_LEVELS = R630 +FCST_VAR2_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 + +OBS_VAR1_NAME = {CONFIG_DIR}/read_input_data.py {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac/ERA5_{valid?fmt=%Y%m%d}00_Cld.nc:{OBTYPE}:totalCloudFrac:{init?fmt=%Y%m%d%H}:{valid?fmt=%Y%m%d%H}:2 +OBS_VAR1_LEVELS = +OBS_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 +OBS_VAR2_NAME = {CONFIG_DIR}/read_input_data.py {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac/ERA5_{valid?fmt=%Y%m%d}00_Cld.nc:{OBTYPE}:lowCloudFrac:{init?fmt=%Y%m%d%H}:{valid?fmt=%Y%m%d%H}:2 +OBS_VAR2_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 + + +### +# GridStat Settings +# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#gridstat +### + +#LOG_GRID_STAT_VERBOSITY = 2 + +GRID_STAT_CONFIG_FILE = {PARM_BASE}/met_config/GridStatConfig_wrapped + +GRID_STAT_REGRID_TO_GRID = FCST +GRID_STAT_REGRID_METHOD = BILIN +GRID_STAT_REGRID_WIDTH = 2 + +GRID_STAT_DESC = + +FCST_GRID_STAT_FILE_WINDOW_BEGIN = 0 +FCST_GRID_STAT_FILE_WINDOW_END = 0 +OBS_GRID_STAT_FILE_WINDOW_BEGIN = 0 +OBS_GRID_STAT_FILE_WINDOW_END = 0 + +GRID_STAT_NEIGHBORHOOD_WIDTH = 1 +GRID_STAT_NEIGHBORHOOD_SHAPE = SQUARE + +GRID_STAT_NEIGHBORHOOD_COV_THRESH = >=0.5 + +GRID_STAT_ONCE_PER_FIELD = False + +FCST_IS_PROB = false + +FCST_GRID_STAT_PROB_THRESH = ==0.1 + +OBS_IS_PROB = false + +OBS_GRID_STAT_PROB_THRESH = ==0.1 + +GRID_STAT_OUTPUT_PREFIX = {MODEL}_to_{OBTYPE}_F{lead?fmt=%H}_CloudFracs + +GRID_STAT_OUTPUT_FLAG_FHO = STAT +GRID_STAT_OUTPUT_FLAG_CTC = STAT +GRID_STAT_OUTPUT_FLAG_CTS = STAT +GRID_STAT_OUTPUT_FLAG_CNT = STAT +GRID_STAT_OUTPUT_FLAG_SL1L2 = STAT +GRID_STAT_OUTPUT_FLAG_GRAD = STAT + +GRID_STAT_NC_PAIRS_FLAG_LATLON = TRUE +GRID_STAT_NC_PAIRS_FLAG_RAW = TRUE +GRID_STAT_NC_PAIRS_FLAG_DIFF = TRUE +GRID_STAT_NC_PAIRS_FLAG_CLIMO = FALSE +GRID_STAT_NC_PAIRS_FLAG_GRADIENT = TRUE +GRID_STAT_NC_PAIRS_FLAG_APPLY_MASK = TRUE + +GRID_STAT_MASK_POLY = {INPUT_BASE}/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac/GPP_17km_60S_60N_mask.nc + +[nbr] + +FCST_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0, >SFP20, >SFP30, >SFP40, >SFP50, >SFP60, >SFP70, >SFP80 +FCST_VAR2_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0, >SFP20, >SFP30, >SFP40, >SFP50, >SFP60, >SFP70, >SFP80 + + +OBS_VAR1_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0, >SOP20, >SOP30, >SOP40, >SOP50, >SOP60, >SOP70, >SOP80 +OBS_VAR2_THRESH = gt0, lt10.0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0, >SOP20, >SOP30, >SOP40, >SOP50, >SOP60, >SOP70, >SOP80 + + +GRID_STAT_NEIGHBORHOOD_WIDTH = 3, 5, 7, 9 +GRID_STAT_NEIGHBORHOOD_SHAPE = CIRCLE + +GRID_STAT_NEIGHBORHOOD_COV_THRESH = >0.0 + +GRID_STAT_OUTPUT_FLAG_FHO = NONE +GRID_STAT_OUTPUT_FLAG_CTC = NONE +GRID_STAT_OUTPUT_FLAG_CTS = NONE +GRID_STAT_OUTPUT_FLAG_CNT = NONE +GRID_STAT_OUTPUT_FLAG_SL1L2 = NONE +GRID_STAT_OUTPUT_FLAG_NBRCTC = STAT +GRID_STAT_OUTPUT_FLAG_NBRCTS = STAT +GRID_STAT_OUTPUT_FLAG_NBRCNT = STAT +GRID_STAT_OUTPUT_FLAG_GRAD = NONE + +GRID_STAT_NC_PAIRS_FLAG_LATLON = TRUE +GRID_STAT_NC_PAIRS_FLAG_RAW = TRUE +GRID_STAT_NC_PAIRS_FLAG_DIFF = TRUE +GRID_STAT_NC_PAIRS_FLAG_NBRHD = TRUE +GRID_STAT_NC_PAIRS_FLAG_GRADIENT = TRUE +GRID_STAT_NC_PAIRS_FLAG_APPLY_MASK = TRUE + +GRID_STAT_OUTPUT_PREFIX = {MODEL}_to_{OBTYPE}_F{lead?fmt=%H}_CloudFracs_NBR + +[prob] + +OBS_VAR1_THRESH = gt0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 +OBS_VAR2_THRESH = gt0, ge10.0, ge20.0, ge30.0, ge40.0, ge50.0, ge60.0, ge70.0, ge80.0, ge90.0 +FCST_IS_PROB = TRUE + +FCST_VAR1_THRESH = >0.1, >0.2, >0.3, >0.4, >0.5, >0.6, >0.7, >0.8, >0.9, >1.0 +FCST_VAR2_THRESH = >0.1, >0.2, >0.3, >0.4, >0.5, >0.6, >0.7, >0.8, >0.9, >1.0 +GRID_STAT_NEIGHBORHOOD_WIDTH = 3, 5, 7, 9 +GRID_STAT_NEIGHBORHOOD_SHAPE = CIRCLE + +GRID_STAT_NEIGHBORHOOD_COV_THRESH = >0.0 + +GRID_STAT_OUTPUT_FLAG_FHO = NONE +GRID_STAT_OUTPUT_FLAG_CTC = NONE +GRID_STAT_OUTPUT_FLAG_CTS = NONE +GRID_STAT_OUTPUT_FLAG_CNT = NONE +GRID_STAT_OUTPUT_FLAG_SL1L2 = NONE +GRID_STAT_OUTPUT_FLAG_NBRCTC = NONE +GRID_STAT_OUTPUT_FLAG_NBRCTS = NONE +GRID_STAT_OUTPUT_FLAG_NBRCNT = NONE +GRID_STAT_OUTPUT_FLAG_GRAD = NONE +GRID_STAT_OUTPUT_FLAG_PCT = STAT +GRID_STAT_OUTPUT_FLAG_PSTD = STAT +GRID_STAT_OUTPUT_FLAG_PJC = STAT +GRID_STAT_OUTPUT_FLAG_PRC = STAT + +GRID_STAT_NC_PAIRS_FLAG_LATLON = TRUE +GRID_STAT_NC_PAIRS_FLAG_RAW = TRUE +GRID_STAT_NC_PAIRS_FLAG_DIFF = TRUE +GRID_STAT_NC_PAIRS_FLAG_NBRHD = FALSE +GRID_STAT_NC_PAIRS_FLAG_GRADIENT = TRUE +GRID_STAT_NC_PAIRS_FLAG_APPLY_MASK = TRUE + +GRID_STAT_OUTPUT_PREFIX = {MODEL}_to_{OBTYPE}_F{lead?fmt=%H}_CloudFracs_PROB + diff --git a/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac/read_input_data.py b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac/read_input_data.py new file mode 100755 index 0000000000..aa436135a6 --- /dev/null +++ b/parm/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac/read_input_data.py @@ -0,0 +1,911 @@ +#this code was provided by Craig Schwartz +#and is largely unaltered from its original +#function. + +#from __future__ import print_function +import os +import sys +import numpy as np +import datetime as dt +from netCDF4 import Dataset # http://code.google.com/p/netcdf4-python/ +from scipy.interpolate import NearestNDInterpolator, LinearNDInterpolator +#### for Plotting +import matplotlib.cm as cm +import matplotlib.axes as maxes +import matplotlib.pyplot as plt +from mpl_toolkits.axes_grid1 import make_axes_locatable +#from mpl_toolkits.basemap import Basemap +import fnmatch +import pygrib +import pickle as pk +##### + +########################################### + +missing_values = -9999.0 # for MET + +# UPP top layer bounds (Pa) for cloud layers +PTOP_LOW_UPP = 64200. # low for > 64200 Pa +PTOP_MID_UPP = 35000. # mid between 35000-64200 Pa +PTOP_HIGH_UPP = 15000. # high between 15000-35000 Pa + +# Values for 4 x 4 contingency table +Na, Nb, Nc, Nd = 1, 2, 3, 4 +Ne, Nf, Ng, Nh = 5, 6, 7, 8 +Ni, Nj, Nk, Nl = 9, 10, 11, 12 +Nm, Nn, No, Np = 13, 14, 15, 16 + +# Notes: +# 1) Entry for 'point' is for point-to-point comparison and is all dummy data (except for gridType) that is overwritten by point2point +# 2) ERA5 on NCAR CISL RDA changed at some point. Old is ERA5_2017 (not used anymore), new is ERA5, which we'll use for 2020 data +griddedDatasets = { + 'MERRA2' : { 'gridType':'LatLon', 'latVar':'lat', 'latDef':[-90.0,0.50,361], 'lonVar':'lon', 'lonDef':[-180.0,0.625,576], 'flipY':True, 'ftype':'nc'}, + 'SATCORPS' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[-180.0,0.3125,1152], 'flipY':False, 'ftype':'nc' }, + 'ERA5_2017': { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-89.7848769072,0.281016829130516,640], 'lonVar':'longitude', 'lonDef':[0.0,0.28125,1280], 'flipY':False, 'ftype':'nc' }, + 'ERA5' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'nc' }, + 'GFS' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'grib'}, + 'GALWEM' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':True, 'ftype':'grib'}, + 'GALWEM17' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-89.921875,0.156250,1152], 'lonVar':'longitude', 'lonDef':[0.117187,0.234375,1536], 'flipY':False, 'ftype':'grib'}, + 'WWMCA' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'grib'}, + 'MPAS' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.25,721], 'lonVar':'longitude', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'nc'}, + 'SAT_WWMCA_MEAN' : { 'gridType':'LatLon', 'latVar':'lat','latDef':[-90.0,0.25,721], 'lonVar':'lon', 'lonDef':[0.0,0.25,1440], 'flipY':False, 'ftype':'nc' }, + 'point' : { 'gridType':'LatLon', 'latVar':'latitude','latDef':[-90.0,0.156250,1152], 'lonVar':'longitude', 'lonDef':[0.117187,0.234375,1536], 'flipY':False, 'ftype':'nc'}, +} + #TODO:Correct one, but MET can ingest a Gaussian grid only in Grib2 format (from Randy B.) + #'ERA5' : { 'gridType':'Gaussian', 'nx':1280, 'ny':640, 'lon_zero':0, 'latVar':'latitude', 'lonVar':'longitude', 'flipY':False, }, + +#GALWEM, both 17-km and 0.25-degree +lowCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':3, 'typeOfFirstFixedSurface':10, 'shortName':'lcc' } +midCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':4, 'typeOfFirstFixedSurface':10, 'shortName':'mcc' } +highCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':5, 'typeOfFirstFixedSurface':10, 'shortName':'hcc' } +totalCloudFrac_GALWEM = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':10, 'shortName':'tcc' } +cloudTopHeight_GALWEM = { 'parameterCategory':6, 'parameterNumber':12, 'typeOfFirstFixedSurface':3, 'shortName':'cdct' } +cloudBaseHeight_GALWEM = { 'parameterCategory':6, 'parameterNumber':11, 'typeOfFirstFixedSurface':2, 'shortName':'cdcb' } + +#GFS +lowCloudFrac_GFS = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':214, 'shortName':'tcc' } +midCloudFrac_GFS = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':224, 'shortName':'tcc' } +highCloudFrac_GFS = { 'parameterCategory':6, 'parameterNumber':1, 'typeOfFirstFixedSurface':234, 'shortName':'tcc' } + +#WWMCA +totalCloudFrac_WWMCA = { 'parameterName':71, 'typeOfLevel':'entireAtmosphere', 'level':0 } + +cloudTopHeightLev1_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':1 } +cloudTopHeightLev2_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':2 } +cloudTopHeightLev3_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':3 } +cloudTopHeightLev4_WWMCA = { 'parameterName':228, 'typeOfLevel':'hybrid', 'level':4 } +cloudTopHeight_WWMCA = [ cloudTopHeightLev1_WWMCA, cloudTopHeightLev2_WWMCA, cloudTopHeightLev3_WWMCA, cloudTopHeightLev4_WWMCA ] + +cloudBaseHeightLev1_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':1 } +cloudBaseHeightLev2_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':2 } +cloudBaseHeightLev3_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':3 } +cloudBaseHeightLev4_WWMCA = { 'parameterName':227, 'typeOfLevel':'hybrid', 'level':4 } +cloudBaseHeight_WWMCA = [ cloudBaseHeightLev1_WWMCA, cloudBaseHeightLev2_WWMCA, cloudBaseHeightLev3_WWMCA, cloudBaseHeightLev4_WWMCA ] + +verifVariablesModel = { + 'binaryCloud' : {'GFS':[''], 'GALWEM17':[totalCloudFrac_GALWEM], 'GALWEM':[totalCloudFrac_GALWEM], 'MPAS':['cldfrac_tot_UM_rand']}, + 'totalCloudFrac' : {'GFS':[''], 'GALWEM17':[totalCloudFrac_GALWEM], 'GALWEM':[totalCloudFrac_GALWEM], 'MPAS':['cldfrac_tot_UM_rand']}, + 'lowCloudFrac' : {'GFS':[lowCloudFrac_GFS], 'GALWEM17':[lowCloudFrac_GALWEM], 'GALWEM':[lowCloudFrac_GALWEM], 'MPAS':['cldfrac_low_UM']}, + 'midCloudFrac' : {'GFS':[midCloudFrac_GFS], 'GALWEM17':[midCloudFrac_GALWEM], 'GALWEM':[midCloudFrac_GALWEM], 'MPAS':['cldfrac_mid_UM']}, + 'highCloudFrac' : {'GFS':[highCloudFrac_GFS], 'GALWEM17':[highCloudFrac_GALWEM], 'GALWEM':[highCloudFrac_GALWEM], 'MPAS':['cldfrac_high_UM']}, + 'cloudTopHeight' : {'GFS':[''] , 'GALWEM17':[cloudTopHeight_GALWEM], 'GALWEM':[cloudTopHeight_GALWEM], 'MPAS':['cldht_top_UM']}, + 'cloudBaseHeight' : {'GFS':[''] , 'GALWEM17':[cloudBaseHeight_GALWEM], 'GALWEM':[cloudBaseHeight_GALWEM], 'MPAS':['cldht_base_UM']}, +} + +cloudFracCatThresholds = '>0, <10.0, >=10.0, >=20.0, >=30.0, >=40.0, >=50.0, >=60.0, >=70.0, >=80.0, >=90.0' # MET format string +brightnessTempThresholds = '<280.0, <275.0, <273.15, <270.0, <265.0, <260.0, <255.0, <250.0, <245.0, <240.0, <235.0, <230.0, <225.0, <220.0, <215.0, <210.0, <=SFP1, <=SFP5, <=SFP10, <=SFP25, <=SFP50, >=SFP50, >=SFP75, >=SFP90, >=SFP95, >=SFP99' +verifVariables = { + 'binaryCloud' : { 'MERRA2':['CLDTOT'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['TCC'], 'WWMCA':[totalCloudFrac_WWMCA], 'SAT_WWMCA_MEAN':['Mean_WWMCA_SATCORPS'], 'units':'NA', 'thresholds':'>0.0', 'interpMethod':'nearest' }, + 'totalCloudFrac' : { 'MERRA2':['CLDTOT'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['tcc'], 'WWMCA':[totalCloudFrac_WWMCA], 'SAT_WWMCA_MEAN':['Mean_WWMCA_SATCORPS'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'lowCloudFrac' : { 'MERRA2':['CLDLOW'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['lcc'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'midCloudFrac' : { 'MERRA2':['CLDMID'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['MCC'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'highCloudFrac' : { 'MERRA2':['CLDHGH'], 'SATCORPS':['cloud_percentage_level'], 'ERA5':['HCC'], 'units':'%', 'thresholds':cloudFracCatThresholds, 'interpMethod':'bilin' }, + 'cloudTopTemp' : { 'MERRA2':['CLDTMP'], 'SATCORPS':['cloud_temperature_top_level'], 'ERA5':[''] , 'units':'K', 'thresholds':'NA', 'interpMethod':'bilin'}, + 'cloudTopPres' : { 'MERRA2':['CLDPRS'], 'SATCORPS':['cloud_pressure_top_level'], 'ERA5':[''] , 'units':'hPa', 'thresholds':'NA', 'interpMethod':'bilin'}, + 'cloudTopHeight' : { 'MERRA2':[''] , 'SATCORPS':['cloud_height_top_level'], 'ERA5':[''] , 'WWMCA':cloudTopHeight_WWMCA, 'units':'m', 'thresholds':'NA', 'interpMethod':'nearest'}, + 'cloudBaseHeight': { 'MERRA2':[''] , 'SATCORPS':['cloud_height_base_level'], 'ERA5':['cbh'], 'WWMCA':cloudBaseHeight_WWMCA, 'units':'m', 'thresholds':'NA', 'interpMethod':'nearest'}, + 'cloudCeiling' : { 'MERRA2':[''] , 'SATCORPS':[''], 'ERA5':[''] , 'units':'m', 'thresholds':'NA', 'interpMethod':'bilin'}, + 'brightnessTemp' : { 'MERRA2':[''] , 'SATCORPS':[''], 'ERA5':[''] , 'units':'K', 'thresholds':brightnessTempThresholds, 'interpMethod':'bilin'}, +} + +# Combine the two dictionaries +# Only reason verifVariablesModel exists is just for space--verifVaribles gets too long if we keep adding more datasets +for key in verifVariablesModel.keys(): + x = verifVariablesModel[key] + for key1 in x.keys(): + verifVariables[key][key1] = x[key1] + +#f = '/glade/u/home/schwartz/cloud_verification/GFS_grib_0.25deg/2018112412/gfs.0p25.2018112412.f006.grib2' +#grbs = pygrib.open(f) +#idx = pygrib.index(f,'parameterCategory','parameterNumber','typeOfFirstFixedSurface') +#model = 'GFS' +#variable = 'totCloudCover' +#x = verifVariablesModel[variable][model] # returns a list, whose ith element is a dictionary +# e.g., idx(parameterCategory=6,parameterNumber=1,typeOfFirstFixedSurface=234) +#idx(parameterCategory=x[0]['parameterCategory'],parameterNumber=x[0]['parameterNumber'],typeOfFirstFixedSurface=x[0]['typeOfFirstFixedSurface']) + +# to read in an environmental variable +#x = os.getenv('a') # probably type string no matter what + +########### + +def getThreshold(variable): + x = verifVariables[variable]['thresholds'] + print(x) # needed for python 3 to read variable into csh variable + return x + +def getInterpMethod(variable): + x = verifVariables[variable]['interpMethod'].upper() + print(x) # needed for python 3 to read variable into csh variable + return x + +def getTotalCloudFrac(source,data): + if source == 'SATCORPS': + # x = data[0][0,:,:,0] * 1.0E-2 # scaling + x = (data[0][0,:,:,1] + data[0][0,:,:,2] + data[0][0,:,:,3])*1.0E-2 # scaling + # y = data[0] + # x = np.sum( y[:,:,:,1:4],axis=3) + elif source == 'MERRA2': +# x = ( data[0][0,:,:]+data[1][0,:,:]+data[2][0,:,:] ) *100.0 # the ith element of data is a numpy array + x = data[0][0,:,:] * 100.0 # the ith element of data is a numpy array + print(x.min(), x.max()) + elif source == 'ERA5': + try: x = data[0][0,0,:,:] * 100.0 + except: x = data[0][0,:,:] * 100.0 + elif source == 'MPAS': + x = data[0][0,:,:] * 100.0 + elif source == 'SAT_WWMCA_MEAN': + x = data[0][0,:,:] # already in % + else: + x = data[0] + + # This next line is WRONG. + # Missing should be set to missing + # Then, the non-missing values are 1s and 0s + #output = np.where(x > 0.0, x, 0.0) + #output = np.where(x < 0.0, -9999.0, x) # missing. currently used for SATCORPS + + x = np.where( x < 0.0 , 0.0, x) # Force negative values to zero + x = np.where( x > 100.0, 100.0, x) # Force values > 100% to 100% + return x + +def getBinaryCloud(source,data): + y = getTotalCloudFrac(source,data) + # keep NaNs as is, but then set everything else to either 100% or 0% + x = np.where( np.isnan(y), y, np.where(y > 0.0, 100.0, 0.0) ) + return x + +def getLayerCloudFrac(source,data,layer): + if source == 'SATCORPS': + if layer.lower().strip() == 'low' : i = 1 + if layer.lower().strip() == 'mid' : i = 2 + if layer.lower().strip() == 'high' : i = 3 + x = data[0][0,:,:,i] * 1.0E-2 # scaling + elif source == 'MERRA2': + x = data[0][0,:,:] * 100.0 + elif source == 'ERA5': + try: x = data[0][0,0,:,:] * 100.0 + except: x = data[0][0,:,:] * 100.0 + elif source == 'MPAS': + x = data[0][0,:,:] * 100.0 + else: + x = data[0] + + x = np.where( x < 0.0, 0.0, x) # Force negative values to zero + x = np.where( x > 100.0, 100.0, x) # Force values > 100% to 100% + + return x + +def getCloudTopTemp(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E-2 # scaling + elif source == 'MERRA2': + x = data[0][0,:,:] + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + else: + x = data[0] + return x + +def getCloudTopPres(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E-1 # scaling + elif source == 'MERRA2': + x = data[0][0,:,:] * 1.0E-2 # scaling [Pa] -> [hPa] + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + else: + x = data[0] + return x + +def getCloudTopHeight(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E+1 # scaling to [meters] + elif source == 'MERRA2': + x = data[0][0,:,:] #TBD + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + elif source == 'GALWEM17': + x = data[0] * 1000.0 * 0.3048 # kilofeet -> meters + elif source == 'MPAS': + x = data[0][0,:,:] # already in meters + elif source == 'WWMCA': + # data is a list (should be length 4) + if len(data) != 4: + print('error with WWMCA Cloud top height') + sys.exit() + tmp = np.array(data) # already in meters + tmp = np.where( tmp <= 0, np.nan, tmp) # replace 0 or negative values with NAN + x = np.nanmax(tmp,axis=0) # get maximum cloud top height across all layers + else: + x = data[0] + + # Eliminate unphysical values (assume cloud top shouldn't be > 50000 meters) + y = np.where( x > 50000.0 , np.nan, x ) + + return y + +def getCloudBaseHeight(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] * 1.0E+1 # scaling to [meters] + elif source == 'MERRA2': + x = data[0][0,:,:] #TBD + elif source == 'ERA5': + try: x = data[0][0,0,:,:] + except: x = data[0][0,:,:] + elif source == 'GALWEM17': + x = data[0] * 1000.0 * 0.3048 # kilofeet -> meters + elif source == 'MPAS': + x = data[0][0,:,:] # already in meters + elif source == 'WWMCA': + # data is a list (should be length 4) + if len(data) != 4: + print('error with WWMCA Cloud base height') + sys.exit() + tmp = np.array(data) # already in meters + tmp = np.where( tmp <= 0, np.nan, tmp) # replace 0 or negative values with NAN + x = np.nanmin(tmp,axis=0) # get lowest cloud base over all layers + else: + x = data[0] + + # Eliminate unphysical values (assume cloud base shouldn't be > 50000 meters) + y = np.where( x > 50000.0 , np.nan, x ) + + return y + +def getCloudCeiling(source,data): + if source == 'SATCORPS': + x = data[0][0,:,:,0] #TBD + elif source == 'MERRA2': + x = data[0][0,:,:] #TBD + elif source == 'ERA5': + try: x = data[0][0,0,:,:] # TBD + except: x = data[0][0,:,:] + return x + +# add other functions for different variables + +########### + +def getDataArray(inputFile,source,variable,dataSource): + # 1) inputFile: File name--either observations or forecast + # 2) source: Obsevation source (e.g., MERRA, SATCORP, etc.) + # 3) variable: Variable to verify + # 4) dataSource: If 1, process forecast file. If 2 process obs file. + +# # specifying names here temporarily. file names should be passed in to python from shell script +# if source == 'merra': nc_file = '/gpfs/fs1/scratch/schwartz/MERRA/MERRA2_400.tavg1_2d_rad_Nx.20181101.nc4' +# elif source == 'satcorp': nc_file = '/glade/scratch/bjung/met/test_satcorps/GEO-MRGD.2018334.0000.GRID.NC' +# elif source == 'era5': nc_file = '/glade/scratch/bjung/met/test_era5/e5.oper.fc.sfc.instan.128_164_tcc.regn320sc.2018111606_2018120112.nc' + + source = source.upper().strip() # Force uppercase and get rid of blank spaces, for safety + + print('dataSource = ',dataSource) + + ftype = griddedDatasets[source]['ftype'].lower().strip() + + # Get file handle + if ftype == 'nc': + nc_fid = Dataset(inputFile, "r", format="NETCDF4") + #nc_fid.set_auto_scale(True) + elif ftype == 'grib': + if source == 'WWMCA': + idx = pygrib.index(inputFile,'parameterName','typeOfLevel','level') + else: + idx = pygrib.index(inputFile,'parameterCategory','parameterNumber','typeOfFirstFixedSurface') + + # dataSource == 1 means forecast, 2 means obs +# if dataSource == 1: varsToRead = verifVariablesModel[variable][source] # if ftype == 'grib', returns a list whose ith element is a dictionary. otherwise, just a list +# if dataSource == 2: varsToRead = verifVariables[variable][source] # returns a list + varsToRead = verifVariables[variable][source] # if ftype == 'grib', returns a list whose ith element is a dictionary. otherwise, just a list + + print('Trying to read ',inputFile) + + # Get lat/lon information--currently not used + #latVar = griddedDatasets[source]['latVar'] + #lonVar = griddedDatasets[source]['lonVar'] + #lats = np.array(nc_fid.variables[latVar][:]) # extract/copy the data + #lons = np.array(nc_fid.variables[lonVar][:] ) + + #print(lats.max()) + #print(lons.max()) + + # one way to deal with scale factors + # probably using something like nc_fid.set_auto_scale(True) is better... + #latMax = lats.max() + #while latMax > 90.0: + # lons = lons * 0.1 + # lats = lats * 0.1 + # latMax = lats.max() + + # get data + data = [] + for v in varsToRead: + if ftype == 'grib': + if source == 'WWMCA': + x = idx(parameterName=v['parameterName'],typeOfLevel=v['typeOfLevel'],level=v['level'])[0] # by getting element 0, you get a pygrib message + else: + # e.g., idx(parameterCategory=6,parameterNumber=1,typeOfFirstFixedSurface=234) + if ( variable == 'cloudTopHeight' or variable == 'cloudBaseHeight') and source == 'GALWEM17': + x = idx(parameterCategory=v['parameterCategory'],parameterNumber=v['parameterNumber'],typeOfFirstFixedSurface=v['typeOfFirstFixedSurface'])[1] # by getting element 1, you get a pygrib message + else: + x = idx(parameterCategory=v['parameterCategory'],parameterNumber=v['parameterNumber'],typeOfFirstFixedSurface=v['typeOfFirstFixedSurface'])[0] # by getting element 0, you get a pygrib message + if x.shortName != v['shortName']: print('Name mismatch!') + #ADDED BY JOHN O + print(x) + print('Reading ', x.shortName, 'at level ', x.typeOfFirstFixedSurface) + read_var = x.values # same x.data()[0] + read_missing = x.missingValue + print('missing value = ',read_missing) + + # The missing value (read_missing) for GALWEM17 and GALWEM cloud base/height is 9999, which is not the best choice because + # those could be actual values. So we need to use the masked array part (below) to handle which + # values are missing. We also set read_missing to something unphysical to essentially disable it. + # Finally, if we don't change the 'missingValue' property in the GRIB2 file we are eventually outputting, + # the bitmap will get all messed up, because it will be based on 9999 instead of $missing_values + if variable == 'cloudTopHeight' or variable == 'cloudBaseHeight': + read_missing = -9999. + x['missingValue'] = read_missing + if source == 'GALWEM17': + #These are masked numpy arrays, with mask = True where there is a missing value (no cloud) + #Use np.ma.filled to create an ndarray where mask = True values are set to np.nan + read_var = np.ma.filled(read_var.astype(read_var.dtype), np.nan) + elif ftype == 'nc': + read_var = nc_fid.variables[v] # extract/copy the data + try: + read_missing = read_var.missing_value # get variable attributes. Each dataset has own missing values. + except: + read_missing = -9999. # set a default missing value. probably only need to do this for MPAS + + print('Reading ', v) + + this_var = np.array( read_var ) # to numpy array + #print(read_missing, np.nan) + this_var = np.where( this_var==read_missing, np.nan, this_var ) + #print(this_var.shape) + data.append(this_var) # ith element of the list is a NUMPY ARRAY for the ith variable + #print(type(this_var)) + #print(type(data)) + + # Call a function to get the variable of interest. + # Add a new function for each variable + if variable == 'binaryCloud': raw_data = getBinaryCloud(source,data) + if variable == 'totalCloudFrac': raw_data = getTotalCloudFrac(source,data) + if variable == 'lowCloudFrac': raw_data = getLayerCloudFrac(source,data,'low') + if variable == 'midCloudFrac': raw_data = getLayerCloudFrac(source,data,'mid') + if variable == 'highCloudFrac': raw_data = getLayerCloudFrac(source,data,'high') + if variable == 'cloudTopTemp': raw_data = getCloudTopTemp(source,data) + if variable == 'cloudTopPres': raw_data = getCloudTopPres(source,data) + if variable == 'cloudTopHeight': raw_data = getCloudTopHeight(source,data) + if variable == 'cloudBaseHeight': raw_data = getCloudBaseHeight(source,data) + if variable == 'cloudCeiling': raw_data = getCloudCeiling(source,data) + + raw_data = np.where(np.isnan(raw_data), missing_values, raw_data) # replace np.nan to missing_values (for MET) + + # Array met_data is passed to MET + # Graphics should plot $met_data to make sure things look correct + if griddedDatasets[source]['flipY']: + print('flipping ',source,' data about y-axis') + met_data=np.flip(raw_data,axis=0).astype(float) + else: + met_data=raw_data.astype(float) + + # Make plotting optional or Just use plot_data_plane +# plt_data=np.where(met_data<0, np.nan, met_data) +# map=Basemap(projection='cyl',llcrnrlat=-90,urcrnrlat=90,llcrnrlon=-180,urcrnrlon=180,resolution='c') +# map.drawcoastlines() +# map.drawcountries() +# map.drawparallels(np.arange(-90,90,30),labels=[1,1,0,1]) +# map.drawmeridians(np.arange(0,360,60),labels=[1,1,0,1]) +# plt.contourf(lons,lats,plt_data,20,origin='upper',cmap=cm.Greens) #cm.gist_rainbow) +# title=source+"_"+variable+"_"+str(validTime) +# plt.title(title) +# plt.colorbar(orientation='horizontal') +# plt.savefig(title+".png") + + # If a forecast file, output a GRIB file with + # 1 record containing the met_data + # This is a hack, because right now, MET python embedding doesn't work with pygrib, + # so output the data to a temporary file, and then have MET read the temporary grib file. + # Starting with version 9.0 of MET, the hack isn't needed, and MET python embedding works with pygrib + outputFcstFile = False # MUST be True for MET version < 9.0. For MET 9.0+, optional + if dataSource == 1 and ftype == 'grib': + if outputFcstFile: + grbtmp = x + grbtmp['values']=met_data + grbout = open('temp_fcst.grb2','ab') + grbout.write(grbtmp.tostring()) + grbout.close() # Close the outfile GRIB file + print('Successfully output temp_fcst.grb2') + + # Close files + if ftype == 'grib': idx.close() # Close the input GRIB file + if ftype == 'nc': nc_fid.close() # Close the netCDF file + + return met_data + +def obsError(fcstData,obsErrorFile,validDate,dataSource): + + print('Adding noise to the cloud fraction fields') + print('Using obsErrorFile',obsErrorFile) + + # First load the obsError information + #obsErrorFile = 'ob_errors.pk' + infile = open(obsErrorFile,'rb') + binEdges, binStddev = pk.load(infile) # 'numpy.ndarray' types + infile.close() + + # Get 1d forecast data + shape = fcstData.shape + fcst = fcstData.flatten() + + # Set random number seed based on valid time and model + if dataSource.upper().strip() == 'MPAS': ii = 10 + elif dataSource.upper().strip() == 'GALWEM': ii = 20 + elif dataSource.upper().strip() == 'GFS': ii = 30 + np.random.seed(int(validDate*.1 + ii)) + + # Find which bin the data is in + for i in range(0,len(binEdges)-1): + idx = np.where( (fcst >= binEdges[i]) & (fcst < binEdges[i+1]) )[0] + n = len(idx) # number of points in the ith bin + if n > 0: # check for empty bins + randVals = np.random.normal(0,binStddev[i],n) + fcst[idx] = fcst[idx] + randVals + + # bound forecast values to between 0 and 100% + fcst = np.where( fcst < 0.0, 0.0, fcst) + fcst = np.where( fcst > 100.0, 100.0, fcst) + + # now reshape forecast data back to 2D + output = fcst.reshape(shape) + + # data will have NaNs where bad. + return output + +def getFcstCloudFrac(cfr,pmid,psfc,layerDefinitions): # cfr is cloud fraction(%), pmid is 3D pressure(Pa), psfc is surface pressure (Pa) code from UPP ./INITPOST.F + + if pmid.shape != cfr.shape: # sanity check + print('dimension mismatch bewteen cldfra and pressure') + sys.exit() + + nlocs, nlevs = pmid.shape + + if len(psfc) != nlocs: # another sanity check + print('dimension mismatch bewteen cldfra and surface pressure') + sys.exit() + + cfracl = np.zeros(nlocs) + cfracm = np.zeros(nlocs) + cfrach = np.zeros(nlocs) + + for i in range(0,nlocs): + + PTOP_HIGH = PTOP_HIGH_UPP + if layerDefinitions.upper().strip() == 'ERA5': + PTOP_LOW = 0.8*psfc[i] + PTOP_MID = 0.45*psfc[i] + elif layerDefinitions.upper().strip() == 'UPP': + PTOP_LOW = PTOP_LOW_UPP + PTOP_MID = PTOP_MID_UPP + + idxLow = np.where( pmid[i,:] >= PTOP_LOW)[0] # using np.where with just 1 argument returns tuple + idxMid = np.where( (pmid[i,:] < PTOP_LOW) & (pmid[i,:] >= PTOP_MID))[0] + idxHigh = np.where( (pmid[i,:] < PTOP_MID) & (pmid[i,:] >= PTOP_HIGH))[0] + + # use conditions in case all indices are missing + if (len(idxLow) >0 ): cfracl[i] = np.max( cfr[i,idxLow] ) + if (len(idxMid) >0 ): cfracm[i] = np.max( cfr[i,idxMid] ) + if (len(idxHigh) >0 ): cfrach[i] = np.max( cfr[i,idxHigh] ) + + tmp = np.vstack( (cfracl,cfracm,cfrach)) # stack the rows into one 2d array + cldfraMax = np.max(tmp,axis=0) # get maximum value across low/mid/high for each pixel (minimum overlap assumption) + + # This is the fortran code put into python format...double loop unnecessary and slow + #for i in range(0,nlocs): + # for k in range(0,nlevs): + # if pmid(i,k) >= PTOP_LOW: + # cfracl(i) = np.max( [cfracl(i),cfr(i,k)] ) # Low + # elif pmid(i,k) < PTOP_LOW and pmid(i,k) >= PTOP_MID: + # cfracm(i) = np.max( [cfracm(i),cfr(i,k)] ) # Mid + # elif pmid(i,k) < PTOP_MID and pmid(i,k) >= PTOP_HIGH: # High + # cfrach(i) = np.max( [cfrach(i),cfr(i,k)] ) + + return cfracl, cfracm, cfrach, cldfraMax + +def getGOES16LatLon(g16_data_file): + + # Start timer + startTime = dt.datetime.utcnow() + + # designate dataset + g16nc = Dataset(g16_data_file, 'r') + + # GOES-R projection info and retrieving relevant constants + proj_info = g16nc.variables['goes_imager_projection'] + lon_origin = proj_info.longitude_of_projection_origin + H = proj_info.perspective_point_height+proj_info.semi_major_axis + r_eq = proj_info.semi_major_axis + r_pol = proj_info.semi_minor_axis + + # Data info + lat_rad_1d = g16nc.variables['x'][:] + lon_rad_1d = g16nc.variables['y'][:] + + # close file when finished + g16nc.close() + g16nc = None + + # create meshgrid filled with radian angles + lat_rad,lon_rad = np.meshgrid(lat_rad_1d,lon_rad_1d) + + # lat/lon calc routine from satellite radian angle vectors + + lambda_0 = (lon_origin*np.pi)/180.0 + + a_var = np.power(np.sin(lat_rad),2.0) + (np.power(np.cos(lat_rad),2.0)*(np.power(np.cos(lon_rad),2.0)+(((r_eq*r_eq)/(r_pol*r_pol))*np.power(np.sin(lon_rad),2.0)))) + b_var = -2.0*H*np.cos(lat_rad)*np.cos(lon_rad) + c_var = (H**2.0)-(r_eq**2.0) + + r_s = (-1.0*b_var - np.sqrt((b_var**2)-(4.0*a_var*c_var)))/(2.0*a_var) + + s_x = r_s*np.cos(lat_rad)*np.cos(lon_rad) + s_y = - r_s*np.sin(lat_rad) + s_z = r_s*np.cos(lat_rad)*np.sin(lon_rad) + + lat = (180.0/np.pi)*(np.arctan(((r_eq*r_eq)/(r_pol*r_pol))*((s_z/np.sqrt(((H-s_x)*(H-s_x))+(s_y*s_y)))))) + lon = (lambda_0 - np.arctan(s_y/(H-s_x)))*(180.0/np.pi) + + # End timer + endTime = dt.datetime.utcnow() + time = (endTime - startTime).microseconds / (1000.0*1000.0) + print('took %f4.1 seconds to get GOES16 lat/lon'%(time)) + + return lon,lat # lat/lon are 2-d arrays + +# -- +def getGOESRetrivalData(goesFile,goesVar): + + if not os.path.exists(goesFile): + print(goesFile+' not there. exit') + sys.exit() + + # First get GOES lat/lon + goesLon2d, goesLat2d = getGOES16LatLon(goesFile) # 2-d arrays + goesLon = goesLon2d.flatten() # 1-d arrays + goesLat = goesLat2d.flatten() + + # Now open the file and get the data we want + nc_goes = Dataset(goesFile, "r", format="NETCDF4") + + # If the next line is true (it should be), this indicates the variable needs to be treated + # as an "unsigned 16-bit integer". This is a pain. So we must use the "astype" method + # to change the variable type BEFORE applying scale_factor and add_offset. After the conversion + # we then can manually apply the scale factor and offset + #goesVar = 'PRES' + goesVar = goesVar.strip() # for safety + if nc_goes.variables[goesVar]._Unsigned.lower().strip() == 'true': + nc_goes.set_auto_scale(False) # Don't automatically apply scale_factor and add_offset to variable + goesData2d = np.array( nc_goes.variables[goesVar]).astype(np.uint16) + goesData2d = goesData2d * nc_goes.variables[goesVar].scale_factor + nc_goes.variables[goesVar].add_offset + goesQC2d = np.array( nc_goes.variables['DQF']).astype(np.uint8) + else: + goesData2d = np.array( nc_goes.variables[goesVar]) + goesQC2d = np.array( nc_goes.variables['DQF']) + + # Make variables 1-d + goesQC = goesQC2d.flatten() + goesData = goesData2d.flatten() + nc_goes.close() + + # Get rid of NaNs; base it on longitude + goesData = goesData[~np.isnan(goesLon)] # Handle data arrays first before changing lat/lon itself + goesQC = goesQC[~np.isnan(goesLon)] + goesLon = goesLon[~np.isnan(goesLon)] # ~ is "logical not", also np.logical_not + goesLat = goesLat[~np.isnan(goesLat)] + if goesLon.shape != goesLat.shape: + print('GOES lat/lon shape mismatch') + sys.exit() + + # If goesQC == 0, good QC and there was a cloud with a valid pressure. + # If goesQC == 4, no cloud; probably clear sky. + # All other QC means no data, and we want to remove those points + idx = np.logical_or( goesQC == 0, goesQC == 4) # Only keep QC == 0 or 4 + goesData = goesData[idx] + goesQC = goesQC[idx] + goesLon = goesLon[idx] + goesLat = goesLat[idx] + + # Only QC with 0 or 4 are left; now set QC == 4 to missing to indicate clear sky + goesData = np.where( goesQC != 0, missing_values, goesData) + + # Get longitude to between (0,360) for consistency with JEDI files (this check is applied to JEDI files, too) + goesLon = np.where( goesLon < 0, goesLon + 360.0, goesLon ) + + print('Min GOES Lon = ',np.min(goesLon)) + print('Max GOES Lon = ',np.max(goesLon)) + + return goesLon, goesLat, goesData + +def point2point(source,inputDir,satellite,channel,goesFile,condition,layerDefinitions,dataSource): + + # Static Variables for QC and obs + qcVar = 'brightness_temperature_'+str(channel)+'@EffectiveQC' #'@EffectiveQC0' # QC variable + obsVar = 'brightness_temperature_'+str(channel)+'@ObsValue' # Observation variable + + # Get GOES-16 retrieval file with auxiliary information + if 'abi' in satellite or 'ahi' in satellite: + goesLon, goesLat, goesData = getGOESRetrivalData(goesFile,'PRES') # return 1-d arrays + lonlatGOES = np.array( list(zip(goesLon, goesLat))) # lon/lat pairs for each GOES ob (nobs_GOES, 2) + #print('shape lonlatGOES = ',lonlatGOES.shape) + print('getting data from ',goesFile) + myGOESInterpolator = NearestNDInterpolator(lonlatGOES,goesData) + + # First check to see if there's a concatenated file with all obs. + # If so, use that. If not, have to process one file per processor, which takes a lot more time + if os.path.exists(inputDir+'/obsout_omb_'+satellite+'_ALL.nc4'): + inputFiles = [inputDir+'/obsout_omb_'+satellite+'_ALL.nc4'] # needs to be in a list since we loop over inputFiles + else: + # Get list of OMB files to process. There is one file per processor. + # Need to get them in order so they are called in the same order for the + # forecast and observed passes through this subroutine. + files = os.listdir(inputDir) + inputFiles = fnmatch.filter(files,'obsout*_'+satellite+'*nc4') # returns relative path names + inputFiles = [inputDir+'/'+s for s in inputFiles] # add on directory name + inputFiles.sort() # Get in order from low to high + if len(inputFiles) == 0: return -99999, -99999 # if no matching files, force a failure + + # Variable to pull for brightness temperature +# if dataSource == 1: v = 'brightness_temperature_'+str(channel)+'@GsiHofXBc' # Forecast variable + if dataSource == 1: v = 'brightness_temperature_'+str(channel)+'@hofx' #'@depbg' # OMB + if dataSource == 2: v = obsVar + + # Read the files and put data in array + allData, allDataQC = [], [] + for inputFile in inputFiles: + nc_fid = Dataset(inputFile, "r", format="NETCDF4") #Dataset is the class behavior to open the file + print('Trying to read ',v,' from ',inputFile) + + # Read forecast/obs data + read_var = nc_fid.variables[v] # extract/copy the data + # read_missing = read_var.missing_value # get variable attributes. Each dataset has own missing values. + this_var = np.array( read_var ) # to numpy array + # this_var = np.where( this_var==read_missing, np.nan, this_var ) + + #if dataSource == 1: # If true, we just read in OMB data, but we want B + # obsData = np.array( nc_fid.variables[obsVar]) + # this_var = obsData - this_var # get background/forecast value (O - OMB = B) + + #Read QC data + qcData = np.array(nc_fid.variables[qcVar]) + + # Sanity check...shapes should match + if qcData.shape != this_var.shape: return -99999, -99999 + + if 'abi' in satellite or 'ahi' in satellite: + + # Get the GOES-16 retrieval data at the observation locations in this file + # GOES values < 0 mean clear sky + lats = np.array(nc_fid.variables['latitude@MetaData']) + lons = np.array(nc_fid.variables['longitude@MetaData']) + + # Get longitude to between (0,360) for consistency with GOES-16 files + lons = np.where( lons < 0, lons + 360.0, lons ) + + lonlat = np.array( list(zip(lons,lats))) # lon/lat pairs for each ob (nobs, 2) + thisGOESData = myGOESInterpolator(lonlat) # GOES data at obs locations in this file. If pressure, units are hPa + thisGOESData = thisGOESData * 100.0 # get into Pa + + #obsCldfra = np.array( nc_fid.variables['cloud_area_fraction@MetaData'] )*100.0 # Get into %...observed cloud fraction (AHI/ABI only) + + geoValsFile = inputFile.replace('obsout','geoval') + if not os.path.exists(geoValsFile): + print(geoValsFile+' not there. exit') + sys.exit() + + nc_fid2 = Dataset(geoValsFile, "r", format="NETCDF4") + fcstCldfra = np.array( nc_fid2.variables['cloud_area_fraction_in_atmosphere_layer'])*100.0 # Get into % + pressure = np.array( nc_fid2.variables['air_pressure']) # Pa + pressure_edges = np.array( nc_fid2.variables['air_pressure_levels']) # Pa + psfc = pressure_edges[:,-1] # Surface pressure (Pa)...array order is top down + if layerDefinitions.upper().strip() == 'ERA5': + PTOP_LOW = 0.8*psfc # these are arrays + PTOP_MID = 0.45*psfc + PTOP_HIGH = PTOP_HIGH_UPP * np.ones_like(psfc) + elif layerDefinitions.upper().strip() == 'UPP': + PTOP_LOW = PTOP_LOW_UPP # these are constants + PTOP_MID = PTOP_MID_UPP + PTOP_HIGH = PTOP_HIGH_UPP + else: + print('layerDefinitions = ',layerDefinitions,'is invalid. exit') + sys.exit() + fcstLow,fcstMid,fcstHigh,fcstTotCldFra = getFcstCloudFrac(fcstCldfra,pressure,psfc,layerDefinitions) # get low/mid/high/total forecast cloud fractions for each ob + nc_fid2.close() + + # Modify QC data based on correspondence between forecast and obs. qcData used to select good data later + # It's possible that there are multiple forecast layers, such that fcstLow,fcstMid,fcstHigh are all > $cldfraThresh + # However, GOES-16 CTP doesn't really account for layering. So, we need to remove layered clouds from the forecast, + # focusing only on the layers that we asked for when doing {low,mid,high}Only conditions + # The "|" is symbol for "np.logcal_or" + yes = 2.0 + no = 0.0 + cldfraThresh = 20.0 # percent + if qcData.shape == fcstTotCldFra.shape == thisGOESData.shape: # these should all match + print('Using condition ',condition,'for ABI/AHI') + + # Note that "&" is "np.logical_and" for boolean (true/false) quantities. + # Thus, each condition should be enclosed in parentheses + if condition.lower().strip() == 'clearOnly'.lower(): # clear in both forecast and obs + qcData = np.where( (fcstTotCldFra < cldfraThresh) & (thisGOESData <= 0.0), qcData, missing_values) + elif condition.lower().strip() == 'cloudyOnly'.lower(): # cloudy in both forecast and obs + qcData = np.where( (fcstTotCldFra >= cldfraThresh) & (thisGOESData > 0.0), qcData, missing_values) + elif condition.lower().strip() == 'lowOnly'.lower(): # low clouds in both forecast and obs + fcstLow = np.where( (fcstMid >= cldfraThresh) | ( fcstHigh >= cldfraThresh), missing_values, fcstLow) # remove mid, high + qcData = np.where( (fcstLow >= cldfraThresh) & ( thisGOESData >= PTOP_LOW), qcData, missing_values) + elif condition.lower().strip() == 'midOnly'.lower(): # mid clouds in both forecast and obs + fcstMid = np.where( (fcstLow >= cldfraThresh) | ( fcstHigh >= cldfraThresh), missing_values, fcstMid) # remove low, high + qcData = np.where( (fcstMid >= cldfraThresh) & (thisGOESData < PTOP_LOW) & (thisGOESData >= PTOP_MID), qcData, missing_values) + elif condition.lower().strip() == 'highOnly'.lower(): # high clouds in both forecast and obs + fcstHigh = np.where( (fcstLow >= cldfraThresh) | ( fcstMid >= cldfraThresh), missing_values, fcstHigh) # remove mid, high + qcData = np.where( (fcstHigh >= cldfraThresh) & (thisGOESData < PTOP_MID) & (thisGOESData >= PTOP_HIGH), qcData, missing_values) + elif condition.lower().strip() == 'fcstLow'.lower(): # low clouds in forecast (layers possible); obs could be anything + qcData = np.where( fcstLow >= cldfraThresh , qcData, missing_values) + elif condition.lower().strip() == 'fcstMid'.lower(): # low clouds in forecast (layers possible); obs could be anything + qcData = np.where( fcstMid >= cldfraThresh , qcData, missing_values) + elif condition.lower().strip() == 'fcstHigh'.lower(): # low clouds in forecast (layers possible); obs could be anything + qcData = np.where( fcstHigh >= cldfraThresh , qcData, missing_values) + elif condition.lower().strip() == 'cloudEventLow'.lower(): + if dataSource == 1: this_var = np.where( fcstLow >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( thisGOESData >= PTOP_LOW, yes, no ) + elif condition.lower().strip() == 'cloudEventMid'.lower(): + if dataSource == 1: this_var = np.where( fcstMid >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( (thisGOESData < PTOP_LOW) & (thisGOESData >= PTOP_MID), yes, no ) + elif condition.lower().strip() == 'cloudEventHigh'.lower(): + if dataSource == 1: this_var = np.where( fcstHigh >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( (thisGOESData < PTOP_MID) & (thisGOESData >= PTOP_HIGH), yes, no ) + elif condition.lower().strip() == 'cloudEventTot'.lower(): + if dataSource == 1: this_var = np.where( fcstTotCldFra >= cldfraThresh, yes, no ) # set cloudy points to 2, clear points to 0, use threshold of 1 in MET + if dataSource == 2: this_var = np.where( thisGOESData > 0.0, yes, no ) + elif condition.lower().strip() == 'all': + print("not doing any conditional verification or stratifying by event") + else: + print("condition = ",condition," not recognized.") + sys.exit() + #elif condition.lower().strip() == '4x4table'.lower(): + #if dataSource == 1: + # this_var = np.where( fcstLow >= cldfraThresh, yesLow, no ) + # this_var = this_var + np.where( fcstMid >= cldfraThresh, yesMid, no ) + # this_var = this_var + np.where( fcstHigh >= cldfraThresh, yesHigh, no ) + print('number removed = ', (qcData==missing_values).sum()) + #print('number passed = ', qcData.shape[0] - (qcData==missing_values).sum()) + else: + print('shape mismatch') + return -99999, -99999 + + # Append to arrays + allData.append(this_var) + allDataQC.append(qcData) + + nc_fid.close() # done with the file, so close it before going to next file in loop + + # We're now all done looping over the individul files + + # Get the indices with acceptable QC + allQC = np.concatenate(allDataQC) # Put list of numpy arrays into a single long 1-D numpy array. All QC data. + idx = np.where(allQC==0) # returns indices + + # Now get all the forecast/observed brightness temperature data with acceptable QC + this_var = np.concatenate(allData)[idx] # Put list of numpy arrays into a single long 1-D numpy array. This is all the forecast/obs data with good QC + numObs = this_var.shape[0] # number of points with good QC for this channel + print('Number of obs :',numObs) + + # Assume all the points actually fit into a square grid. Get the side of the square (use ceil to round up) + if numObs > 0: + l = np.ceil(np.sqrt(numObs)).astype('int') # Length of the side of the square + + # Make an array that can be reshaped into the square + raw_data1D = np.full(l*l,np.nan) # Initialize 1D array of length l**2 to np.nan + raw_data1D[0:numObs] = this_var[:] # Fill data to the extent possible. There will be some np.nan values at the end + raw_data = np.reshape(raw_data1D,(l,l)) # Reshape into "square grid" + + raw_data = np.where(np.isnan(raw_data), missing_values, raw_data) # replace np.nan to missing_values (for MET) + + met_data=raw_data.astype(float) # Give MET this info + + # Now need to tell MET the "grid" for the data + # Make a fake lat/lon grid going from 0.0 to 50.0 degrees, with the interval determined by number of points + griddedDatasets[source]['latDef'][0] = 0.0 # starting point + griddedDatasets[source]['latDef'][1] = np.diff(np.linspace(0,50,l)).round(6)[0] # interval (degrees) + griddedDatasets[source]['latDef'][2] = int(l) # number of points + griddedDatasets[source]['lonDef'][0:3] = griddedDatasets[source]['latDef'] + + gridInfo = getGridInfo(source, griddedDatasets[source]['gridType']) # 'LatLon' gridType + return met_data, gridInfo + + else: + return -99999, -99999 + +########### +def getGridInfo(source,gridType): + + if gridType == 'LatLon': + latDef = griddedDatasets[source]['latDef'] + lonDef = griddedDatasets[source]['lonDef'] + gridInfo = { + 'type': gridType, + 'name': source, + 'lat_ll': latDef[0], #-90.000, + 'lon_ll': lonDef[0], #-180.000, + 'delta_lat': latDef[1], #0.5000, + 'delta_lon': lonDef[1], #0.625, + 'Nlat': latDef[2], #361, + 'Nlon': lonDef[2], #576, + } + elif gridType == 'Gaussian': + gridInfo = { + 'type': gridType, + 'name': source, + 'nx': griddedDatasets[source]['nx'], + 'ny': griddedDatasets[source]['ny'], + 'lon_zero': griddedDatasets[source]['lon_zero'], + } + + return gridInfo + +def getAttrArray(source,variable,initTime,validTime): + + init = dt.datetime.strptime(initTime,"%Y%m%d%H") + valid = dt.datetime.strptime(validTime,"%Y%m%d%H") + lead, rem = divmod((valid-init).total_seconds(), 3600) + + attrs = { + + 'valid': valid.strftime("%Y%m%d_%H%M%S"), + 'init': init.strftime("%Y%m%d_%H%M%S"), + 'lead': str(int(lead)), + 'accum': '000000', + + 'name': variable, #'MERRA2_Cloud_Percentage' + 'long_name': variable, #'Cloud Percentage Levels', + 'level': 'ALL', + 'units': verifVariables[variable]['units'], + + 'grid': getGridInfo(source,griddedDatasets[source]['gridType']) + } + + #print(attrs) + #print(griddedDatasets[source]) + + return attrs + +######## END FUNCTIONS ########## + + +#if __name__ == "__main__": +dataFile, dataSource, variable, i_date, v_date, flag = sys.argv[1].split(":") +met_data = getDataArray(dataFile,dataSource,variable,flag) +attrs = getAttrArray(dataSource,variable,i_date,v_date) +print(attrs) From 057d4a8a62cb686892e01bcffec54b89beea5cb3 Mon Sep 17 00:00:00 2001 From: j-opatz <59586397+j-opatz@users.noreply.github.com> Date: Tue, 6 Jun 2023 12:15:36 -0600 Subject: [PATCH 5/6] Update GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.py --- .../GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.py b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.py index 9c389e9566..8e9e931ce1 100644 --- a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.py +++ b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.py @@ -48,7 +48,7 @@ # # | **Init:** 2022-07-03 12Z # | **Forecast lead:** 36 hour -# | +# # Because instance names are used, GridStat will run 3 times for this 1 initalization time. ############################################################################## From f619ce9d71f30cfc1f933a2b2734af2f9b6a8701 Mon Sep 17 00:00:00 2001 From: j-opatz Date: Tue, 6 Jun 2023 12:35:07 -0600 Subject: [PATCH 6/6] removed chars, added space to last section --- .../GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.py | 1 + .../GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.py | 3 ++- .../GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.py | 3 ++- .../GridStat_fcstMPAS_obsERA5_cloudBaseHgt.py | 3 ++- .../GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.py | 3 ++- .../GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.py | 3 ++- 6 files changed, 11 insertions(+), 5 deletions(-) diff --git a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.py b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.py index 8e9e931ce1..26c14bafb6 100644 --- a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.py +++ b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.py @@ -135,3 +135,4 @@ # Navigate to the :ref:`quick-search` page to discover other similar use cases. # # sphinx_gallery_thumbnail_path = '_static/air_quality_and_comp-GridStat_fcstGFS_obsERA5_lowAndTotalCloudFrac.png' +# diff --git a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.py b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.py index 37228aaadc..9ea7468677 100644 --- a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.py +++ b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.py @@ -48,7 +48,7 @@ # # | **Init:** 2021-07-03 12Z # | **Forecast lead:** 36 hour -# | +# # Because instance names are used, GridStat will run 3 times for this 1 initalization time. ############################################################################## @@ -135,3 +135,4 @@ # Navigate to the :ref:`quick-search` page to discover other similar use cases. # # sphinx_gallery_thumbnail_path = '_static/air_quality_and_comp-GridStat_fcstGFS_obsMERRA2_lowAndTotalCloudFrac.png' +# diff --git a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.py b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.py index 90c27878f9..918e4a8f98 100644 --- a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.py +++ b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.py @@ -48,7 +48,7 @@ # # | **Init:** 2022-07-03 12Z # | **Forecast lead:** 36 hour -# | +# # Because instance names are used, GridStat will run 2 times for this 1 initalization time. ############################################################################## @@ -134,3 +134,4 @@ # Navigate to the :ref:`quick-search` page to discover other similar use cases. # # sphinx_gallery_thumbnail_path = '_static/air_quality_and_comp-GridStat_fcstGFS_obsSATCORPS_cloudTopPressAndTemp.png' +# diff --git a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.py b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.py index 58f94a519b..f50a0be14a 100644 --- a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.py +++ b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsERA5_cloudBaseHgt.py @@ -47,7 +47,7 @@ # # | **Init:** 2020-07-23 00Z # | **Forecast lead:** 36 hour -# | +# # Because instance names are used, GridStat will run 2 times for this 1 initalization time. ############################################################################## @@ -132,3 +132,4 @@ # Navigate to the :ref:`quick-search` page to discover other similar use cases. # # sphinx_gallery_thumbnail_path = '_static/air_quality_and_comp-GridStat_fcstMPAS_obsERA5_cloudBaseHgt.png' +# diff --git a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.py b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.py index 1c37c29e05..a3587f7a19 100644 --- a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.py +++ b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.py @@ -47,7 +47,7 @@ # # | **Init:** 2020-07-23 00Z # | **Forecast lead:** 36 hour -# | +# # Because instance names are used, GridStat will run 3 times for this 1 initalization time. ############################################################################## @@ -134,3 +134,4 @@ # Navigate to the :ref:`quick-search` page to discover other similar use cases. # # sphinx_gallery_thumbnail_path = '_static/air_quality_and_comp-GridStat_fcstMPAS_obsMERRA2_lowAndTotalCloudFrac.png' +# diff --git a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.py b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.py index a6905bb2bd..67dcb7e67e 100644 --- a/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.py +++ b/docs/use_cases/model_applications/air_quality_and_comp/GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.py @@ -47,7 +47,7 @@ # # | **Init:** 2020-07-23 00Z # | **Forecast lead:** 36 hour -# | +# # Because instance names are used, GridStat will run 3 times for this 1 initalization time. ############################################################################## @@ -134,3 +134,4 @@ # Navigate to the :ref:`quick-search` page to discover other similar use cases. # # sphinx_gallery_thumbnail_path = '_static/air_quality_and_comp-GridStat_fcstMPAS_obsSATCORPS_lowAndTotalCloudFrac.png' +#