diff --git a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py index 8fb804771c..a7b11c5362 100644 --- a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py +++ b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py @@ -5,50 +5,84 @@ model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf """ +############################################################################## +# .. contents:: +# :depth: 1 +# :local: +# :backlinks: none + ############################################################################## # Scientific Objective # -------------------- -# This use case ingests two CESM (CAM and CLM) files and a new FLUX2015 dataset (NETCDF) that Paul Dirmeyer, GMU prepared from FLUXNET2015 ASCII dataset. -# The use case will calculate Terrestrial Coupling Index (TCI) from CESM datasets. -# Utilizing Python embedding, this use case taps into a new vital observation dataset and compares it to CESM simulations TCI forecast. -# Finally, it will generate plots of model TCI and observed TCI. +# This use case ingests two CESM (CAM and CLM) files and raw FLUXNET2015 data. +# The use case calculates the Terrestrial Coupling Index (TCI) from the CESM forecasts and FLUXNET observations. +# Utilizing Python embedding, this use case taps into a new vital observation dataset and compares it to CESM forecasts of TCI. +# Finally, it will generate plots of model forecast TCI overlaid with TCI observations at FLUXNET sites. +# +# The reference for the Terrestrial Coupling Index calculation is as follows: +# +# Dirmeyer, P. A., 2011: The terrestrial segment of soil moisture-climate coupling. *Geophys. Res. Lett.*, **38**, L16702, doi: 10.1029/2011GL048268. +# ############################################################################## # Datasets # --------------------- # -# | **Forecast:** CESM 1979-1983 Simulations a. Community Land Model (CLM) and b. Community Atmosphere Model (CAM) -# -# | **Observations:** FLUXNET2015 post processed and converted to NETCDF. This data includes data collected from multiple regional flux towers. +# | **Forecast:** CESM 1979-1983 Simulations +# | * Community Land Model (CLM) file +# | * Community Atmosphere Model (CAM) file # +# | **Observations:** Raw FLUXNET2015 observations # -# | **Location:** All of the input data required for this use case can be found in the met_test sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases +# | **Location:** All of the input data required for this use case can be found in the land_surface sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases # | This tarball should be unpacked into the directory that you will set the value of INPUT_BASE. See `Running METplus`_ section for more information. # -# | **Data Source:** CESM - CGD; FLUXNET2015 - Paul Dirmeyer +# | **Data Source:** CESM - NSF NCAR Climate & Global Dynamics (CGD); FLUXNET2015 "SUBSET" Data Product: https://fluxnet.org/data/fluxnet2015-dataset/subset-data-product/ +# + +############################################################################## +# Python Dependencies +# --------------------- +# +# This use case requires the following Python dependencies:: +# +# * Xarray +# * Pandas +# * METcalcpy 3.0.0+ +# ############################################################################## # METplus Components # ------------------ # # This use case utilizes the METplus PyEmbedIngest to read the CESM files and calculate TCI using python embedding and a NETCDF file of the TCI is generated. -# The METplus PointStat processes the output of PyEmbedIngest and FLUXNET2015 dataset (using python embedding), and outputs the requested line types. -# Then METplus PlotPointObs tool reads the output of PyEmbedIngest and FLUXNET2015 dataset and produce plots of TCI from CESM and point observations. +# The METplus PointStat processes the output of PyEmbedIngest and FLUXNET2015 dataset (using Python embedding), and outputs the requested line types. +# Then the METplus PlotPointObs tool reads the output of PyEmbedIngest and FLUXNET2015 dataset and produce plots of TCI from CESM and point observations. # A custom loop runs through all the pre-defined seasons (DJF, MAM, JJA, SON) and runs PyEmbedIngest, PointStat, and PlotPointObs. +# ############################################################################## # METplus Workflow # ---------------- # -# The PyEmbedIngest tool reads 2 CESM files containing Soil Moisture and Sensible Heat Flux, each composed of daily forecasts from -# 1979 to 1983 and calculates TCI and generates a NETCDF file of the TCI. One FLUXNET2015 NETCDF file containing station observations -# of several variables including Coupling Index of Soil Moisture and Sensible Heat Flux is read by Python Embedding. -# +# The PyEmbedIngest tool reads 2 CESM files containing Soil Moisture (CLM file) and Sensible Heat Flux (CAM file), each composed of daily forecasts from +# 1979 to 1983 and calculates TCI and generates a NETCDF file of the TCI. Raw CSV files containing FLUXNET station observations of latent heat flux (LE_F_MDS) +# and soil water content at the shallowest level (SWC_F_MDS_1) are read using Python embedding, and TCI is computed. +# # | **Valid Beg:** 1979-01-01 at 00z # | **Valid End:** 1979-01-01 at 00z -# +# # PointStat is used to compare the two new fields (TCI calculated from CESM dataset and FLUXNET2015). -# Finally, PlotPointObs is run to plot the CESM TCI overlaying the FLUXNET2015 point observations. +# Finally, PlotPointObs is run to plot the CESM TCI overlaying the FLUXNET2015 point observations. +# +# .. note:: +# +# The CESM forecasts cover a time period prior to the availability of FLUXNET observations. Thus, +# this use case should be considered a demonstration of the capability to read CESM forecast data, +# raw FLUXNET observation data, and compute TCI, rather than a bonafide scientific application. +# The use case is designed to enforce seasonal alignment, but it is not designed to enforce date/time alignment. +# In this case, the CESM data cover 1979-1983, whereas the sample FLUXNET observations cover varying time ranges depending on the site. +# ############################################################################## # METplus Configuration @@ -76,18 +110,107 @@ # Python Embedding # ---------------- # -# This use case uses a Python embedding script to read input data +# This use case uses a Python embedding script to read both the forecast and observation data, in order to compute TCI, +# which is the diagnostic that is being verified by MET using PointStat. The CESM forecast data is read using: # # parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py # # .. highlight:: python # .. literalinclude:: ../../../../parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py # +# The user can control all arguments to this script via the METplus use case configuration file using the following config entries: +# +# .. glossary:: +# +# CESM_CAM_VAR +# The CESM Community Atmosphere Model variable to use for computing TCI +# +# | *Default:* LHFLX +# +# CESM_CLM_VAR +# The CESM Community Land Model variable to use for computing TCI +# +# | *Default:* SOILWATER_10CM +# +# CESM_CAM_FILE_PATH +# The absolute path to the CESM Community Atmosphere Model netcdf file +# +# | *Default:* +# +# CESM_CLM_FILE_PATH +# The absolute path to the CESM Community Land Model netcdf file +# +# | *Default:* +# +# The raw FLUXNET2015 SUBSET data are read using: +# # parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py # # .. highlight:: python # .. literalinclude:: ../../../../parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py # +# The user can control all command line arguments to this script via METplus config entries: +# +# .. glossary:: +# +# FLUXNET_DATA_DIR +# The directory containing raw FLUXNET CSV files +# +# | *Default:* +# +# FLUXNET_LAT_HEAT_VAR +# The FLUXNET surface latent heat flux variable name to use for computing TCI +# +# | *Default:* LE_F_MDS +# +# FLUXNET_SOIL_MOIST_VAR +# The FLUXNET soil moisture variable name to use for computing TCI +# +# | *Default:* SWC_F_MDS_1 +# +# FLUXNET_OBS_METADATA_FILE +# The absolute path to the fluxnetstations.csv metadata file included with the use case +# +# | *Default:* +# +# and for data filtering options, via METplus config entries: +# +# .. glossary:: +# +# FLUXNET_SKIP_LEAP_DAYS +# Skip FLUXNET observations from 29 February days +# +# | *Default:* True +# +# FLUXNET_HIGHRES_QC_THRESH +# The fraction of higher temporal resolution FLUXNET data required to have +# passed quality control in order to use the daily data. +# +# | *Default:* 0.8 +# +# FLUXNET_MIN_DAYS_PER_SEASON +# The minimum number of days to include in individual seasons at each site +# +# | *Default:* 1 +# +# FLUXNET_MIN_DAYS_PER_SITE +# The minimum number of days for all seasons at each site +# +# | *Default:* 10 +# +# FLUXNET_RAW_FILENAME_PATTERN +# A filename pattern matching the template of the raw FLUXNET CSV files +# +# | *Default:* FLX_*_DD_*.csv +# +# FLUXNET_DEBUG +# Turn on additional print statements from the Python embedding script +# +# | *Default:* False +# +# Both of the above Python embedding scripts compute TCI using the ``calc_tci()`` function in METcalcpy. See the METcalcpy +# documentation for more information: https://metcalcpy.readthedocs.io/en/latest/index.html. +# ############################################################################## # Running METplus @@ -99,6 +222,7 @@ # run_metplus.py /path/to/METplus/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf /path/to/user_system.conf # # See :ref:`running-metplus` for more information. +# ############################################################################## # Expected Output diff --git a/internal/tests/use_cases/all_use_cases.txt b/internal/tests/use_cases/all_use_cases.txt index 8aed023e48..daef1a124e 100644 --- a/internal/tests/use_cases/all_use_cases.txt +++ b/internal/tests/use_cases/all_use_cases.txt @@ -88,7 +88,7 @@ Category: data_assimilation Category: land_surface -0::PointStat_fcstCESM_obsFLUXNET2015_TCI:: model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf:: py_embed +0::PointStat_fcstCESM_obsFLUXNET2015_TCI:: model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf:: metplotpy_env, py_embed Category: marine_and_cryosphere diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf index 4fe0905888..7ed802462f 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf @@ -40,107 +40,93 @@ CUSTOM_LOOP_LIST = DJF,MAM,JJA,SON LOG_LEVEL=DEBUG ############################################################ -# PyEmbedIngest Settings +# PyEmbedIngest Settings for CESM forecast data # https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#pyembedingest +# User can modify which flux and soil variable is extracted from the CESM files ############################################################ +CESM_CAM_VAR = LHFLX +CESM_CLM_VAR = SOILWATER_10CM +CESM_CAM_FILE_PATH = {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fcst/f.e21.FHIST.f09_f09_mg17.CESM2-CLM45physics.002.cam.h1.1979-83_CIvars.nc +CESM_CLM_FILE_PATH = {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fcst/f.e21.FHIST.f09_f09_mg17.CESM2-CLM45physics.002.clm2.h1.1979-83_SoilWater10cm.nc + PY_EMBED_INGEST_1_OUTPUT_DIR = PY_EMBED_INGEST_1_OUTPUT_TEMPLATE = {OUTPUT_BASE}/regrid_data_plane_{custom}.nc -############################################################ -# The last argument is the Latent Heat Flux. -# User can change it to use any other variable present in CESM files. -############################################################ - -PY_EMBED_INGEST_1_SCRIPT = {PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/f.e21.FHIST.f09_f09_mg17.CESM2-CLM45physics.002.clm2.h1.1979-83_SoilWater10cm.nc {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/f.e21.FHIST.f09_f09_mg17.CESM2-CLM45physics.002.cam.h1.1979-83_CIvars.nc {custom} LHFLX +PY_EMBED_INGEST_1_SCRIPT = {PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py {CESM_CAM_FILE_PATH} {CESM_CAM_VAR} {CESM_CLM_FILE_PATH} {CESM_CLM_VAR} {custom} PY_EMBED_INGEST_1_TYPE = NUMPY -#PY_EMBED_INGEST_1_OUTPUT_GRID = G129 PY_EMBED_INGEST_1_OUTPUT_GRID = "latlon 288 192 -90.0 -0.0 0.9375 1.25" ############################################################ -# PointStat Settings -# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#pointstat +# Python embedding settings for using FLUXNET2015 obs data with PointStat. +# User can change the variables used from FLUXNET here to compute TCI. +# The FLUXNET_* config items are also used for PlotPointObs below. ############################################################ +FLUXNET_DATA_DIR = {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/obs +FLUXNET_LAT_HEAT_VAR = LE_F_MDS +FLUXNET_SOIL_MOIST_VAR = SWC_F_MDS_1 +FLUXNET_OBS_METADATA_FILE={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnetstations.csv -FCST_POINT_STAT_INPUT_TEMPLATE = PYTHON_NUMPY +OBS_POINT_STAT_INPUT_TEMPLATE = PYTHON_NUMPY={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py {FLUXNET_DATA_DIR} {FLUXNET_LAT_HEAT_VAR} {FLUXNET_SOIL_MOIST_VAR} {custom} {FLUXNET_OBS_METADATA_FILE} ############################################################ -# The default variables for the FLUXNET2015 observations id 10CM Soil Moisture -# and Latent Heat Flux. User can change them in the python Embedding script -# fluxnet_tci.py in the PointStat_fcstCESM_obsFLUXNET2015_TCI -# directory. +# PointStat Settings +# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#pointstat ############################################################ -OBS_POINT_STAT_INPUT_TEMPLATE = PYTHON_NUMPY={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/F2015_LoCo_AllChains_F2015.nc4 {custom} - -POINT_STAT_OUTPUT_DIR = {OUTPUT_BASE}/PointStat ############################################################ # Field Info # https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#field-info ############################################################ -POINT_STAT_ONCE_PER_FIELD = False - -FCST_POINT_STAT_INPUT_DIR = {OUTPUT_BASE}/ -FCST_POINT_STAT_INPUT_TEMPLATE = regrid_data_plane_{custom}.nc -FCST_POINT_STAT_VAR1_LEVELS = OBS_POINT_STAT_VAR1_NAME = TCI -OBS_POINT_STAT_VAR1_LEVELS = L0 +OBS_POINT_STAT_VAR1_LEVELS = L0 +OBS_POINT_STAT_WINDOW_BEGIN = -1000000000 +OBS_POINT_STAT_WINDOW_END = 2000000000 -FCST_POINT_STAT_VAR1_NAME = TCI_10cm_soil_depth +FCST_POINT_STAT_VAR1_NAME = TCI_10cm_soil_depth FCST_POINT_STAT_VAR1_LEVELS = Z10 -BOTH_POINT_STAT_VAR1_THRESH = +FCST_POINT_STAT_INPUT_DIR = {OUTPUT_BASE} +FCST_POINT_STAT_INPUT_TEMPLATE = regrid_data_plane_{custom}.nc ############################################################ -# PointStat Settings -# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#pointstat +# General PointStat Settings ############################################################ -LOG_POINT_STAT_VERBOSITY = 2 - -POINT_STAT_CONFIG_FILE ={PARM_BASE}/met_config/PointStatConfig_wrapped - +POINT_STAT_CONFIG_FILE = {PARM_BASE}/met_config/PointStatConfig_wrapped +POINT_STAT_DESC = TCI POINT_STAT_INTERP_TYPE_METHOD = NEAREST POINT_STAT_INTERP_TYPE_WIDTH = 1 - +POINT_STAT_MASK_GRID = FULL +POINT_STAT_MASK_POLY = +POINT_STAT_MASK_SID = +POINT_STAT_MESSAGE_TYPE = ADPSFC +POINT_STAT_OBS_VALID_BEG = 19790101_000000 +POINT_STAT_OBS_VALID_END = 20130101_000000 +POINT_STAT_OFFSETS = 0 +POINT_STAT_ONCE_PER_FIELD = False +POINT_STAT_OUTPUT_DIR = {OUTPUT_BASE}/PointStat POINT_STAT_OUTPUT_FLAG_CTC = BOTH POINT_STAT_OUTPUT_FLAG_MPR = BOTH POINT_STAT_OUTPUT_FLAG_CNT = BOTH - -OBS_POINT_STAT_WINDOW_BEGIN = -1000000000 -OBS_POINT_STAT_WINDOW_END = 2000000000 - -POINT_STAT_OFFSETS = 0 - -MODEL = CESM - -POINT_STAT_DESC = TCI -OBTYPE = - +POINT_STAT_OUTPUT_PREFIX = {custom} POINT_STAT_REGRID_TO_GRID = NONE POINT_STAT_REGRID_METHOD = BILIN POINT_STAT_REGRID_WIDTH = 2 -POINT_STAT_OBS_VALID_BEG = 19790101_000000 -POINT_STAT_OBS_VALID_END = 20130101_000000 - -POINT_STAT_MASK_GRID = FULL -POINT_STAT_MASK_POLY = -POINT_STAT_MASK_SID = - -POINT_STAT_MESSAGE_TYPE = ADPSFC - -POINT_STAT_OUTPUT_PREFIX = {custom} +LOG_POINT_STAT_VERBOSITY = 2 +MODEL = CESM +OBTYPE = FLUXNET ############################################################ # PlotPointObs Settings # https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#plotpointobs ############################################################ -PLOT_POINT_OBS_INPUT_TEMPLATE =PYTHON_NUMPY={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/F2015_LoCo_AllChains_F2015.nc4 {custom} +PLOT_POINT_OBS_INPUT_TEMPLATE =PYTHON_NUMPY={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py {FLUXNET_DATA_DIR} {FLUXNET_LAT_HEAT_VAR} {FLUXNET_SOIL_MOIST_VAR} {custom} {FLUXNET_OBS_METADATA_FILE} -PLOT_POINT_OBS_GRID_INPUT_DIR = {OUTPUT_BASE}/ +PLOT_POINT_OBS_GRID_INPUT_DIR = {OUTPUT_BASE} PLOT_POINT_OBS_GRID_INPUT_TEMPLATE = regrid_data_plane_{custom}.nc PLOT_POINT_OBS_OUTPUT_DIR = {OUTPUT_BASE}/plot_point_obs @@ -170,3 +156,13 @@ PLOT_POINT_OBS_POINT_DATA = } } +############################################################ +# FLUXNET Python Embedding Script options +############################################################ +[user_env_vars] +FLUXNET_SKIP_LEAP_DAYS=True +FLUXNET_HIGHRES_QC_THRESH=0.8 +FLUXNET_MIN_DAYS_PER_SEASON=1 +FLUXNET_MIN_DAYS_PER_SITE_ALL_SEASONS=10 +FLUXNET_RAW_FILENAME_PATTERN=FLX_*_DD_*.csv +FLUXNET_DEBUG=False diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py index b7a97bdc97..5b69bcd693 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py @@ -6,114 +6,111 @@ from two CESM files User needs to provide the season (DJF, MAM, JJA, or SON) in the METplus conf file User can change the variables to compute TCI -""" - -import numpy as np -import xarray as xr +Modified Nov 2023 +""" +import metcalcpy.diagnostics.land_surface as land_sfc +import os import pandas as pd -import datetime -import time import sys -import os -import netCDF4 as nc - -if len(sys.argv) < 4: - print("Must specify the following elements: sfc_flux_file soil_file season (DJF, MAM, JJA, or SON)") - sys.exit(1) - -fileCLM = os.path.expandvars(sys.argv[1]) -fileCAM = os.path.expandvars(sys.argv[2]) -season = sys.argv[3] -var_y = sys.argv[4] - -print('Starting Terrestrial Coupling Index Calculation for: ',season) - -camDS_CLM45 = xr.open_dataset(fileCAM, decode_times=False) -print('Finished reading in CAM file') -clmDS_CLM45 = xr.open_dataset(fileCLM, decode_times=False) -print('Finished reading in CLM file') - -if season=="DJF": - ss = 0 -elif season=="MAM": - ss = 1 -elif season=="JJA": - ss = 2 -elif season=="SON": - ss = 3 -else: - sys.exit('ERROR : URECOGNIZED SEASON, PLEASE USE DJF, MAM, JJA, OR SON') - -units, reference_date = camDS_CLM45.time.attrs['units'].split('since') -camDS_CLM45['time'] = pd.date_range(start=reference_date, periods=camDS_CLM45.sizes['time'], freq='D') -camDS_time=camDS_CLM45.time[0] -dt_object = datetime.datetime.utcfromtimestamp(camDS_time.values.tolist()/1e9) -vDate=dt_object.strftime("%Y%m%d") - - -units, reference_date = clmDS_CLM45.time.attrs['units'].split('since') -clmDS_CLM45['time'] = pd.date_range(start=reference_date, periods=clmDS_CLM45.sizes['time'], freq='D') - -ds = camDS_CLM45 -ds['SOILWATER_10CM'] = (('time','lat','lon'), clmDS_CLM45.SOILWATER_10CM.values) - -xname = 'SOILWATER_10CM' # Controlling variable -#yname = 'LHFLX' # Responding variable -yname=str(var_y) - -xday = ds[xname].groupby('time.season') -yday = ds[yname].groupby('time.season') - -# Get the covariance of the two (numerator in coupling index) -covarTerm = ((xday - xday.mean()) * (yday - yday.mean())).groupby('time.season').sum() / xday.count() - -# Now compute the coupling index -couplingIndex = covarTerm/xday.std() -ci_season=couplingIndex[ss,:,:] - -ci = np.where(np.isnan(ci_season), -9999, ci_season) - -met_data = ci[:,:] -met_data = met_data[::-1].copy() - -#trim the lat/lon grids so they match the data fields -lat_met = camDS_CLM45.lat -lon_met = camDS_CLM45.lon -print(" Model Data shape: "+repr(met_data.shape)) -v_str = vDate -v_str = v_str + '_000000' -#print(" Valid date: "+v_str) -lat_ll = float(lat_met.min()) -lon_ll = float(lon_met.min()) -n_lat = lat_met.shape[0] -n_lon = lon_met.shape[0] -delta_lat = (float(lat_met.max()) - float(lat_met.min()))/float(n_lat) -delta_lon = (float(lon_met.max()) - float(lon_met.min()))/float(n_lon) - -print(f"variables:" - f"lat_ll: {lat_ll} lon_ll: {lon_ll} n_lat: {n_lat} n_lon: {n_lon} delta_lat: {delta_lat} delta_lon: {delta_lon}") - -attrs = { - 'valid': v_str, - 'init': v_str, - 'lead': "000000", - 'accum': "000000", - 'name': 'TCI', - 'standard_name': 'terrestrial_coupling_index', - 'long_name': 'terrestrial_coupling_index', - 'level': "10cm_soil_depth", - 'units': "W/m2", - - 'grid': { - 'type': "LatLon", - 'name': "CESM Grid", - 'lat_ll': lat_ll, - 'lon_ll': lon_ll, - 'delta_lat': delta_lat, - 'delta_lon': delta_lon, - 'Nlat': n_lat, - 'Nlon': n_lon, - } - } +import xarray as xr + +# The script expects five arguments: +# sfc_flux_file = Latent Heat Flux file (from CAM, CESM "community atmosphere model") +# sfc_flux_varname = Field name in sfc_flux_file to use when computing TCI +# soil_file = Soil Temperature file (from CLM, CESM "community land model") +# soil_varname = Field name in soil_file to use when computing TCI +# season = "DJF", "MAM", "JJA", or "SON" +if len(sys.argv) < 6: + print("Must specify the following elements: ") + print("sfc_flux_file") + print("sfc_flux_varname") + print("soil_file") + print("soil_varname") + print("season (DJF, MAM, JJA, or SON)") + sys.exit(1) + +# Store command line arguments +fileCAM = os.path.expandvars(sys.argv[1]) +sfc_flux_varname = varCAM = sys.argv[2] +fileCLM = os.path.expandvars(sys.argv[3]) +soil_varname = varCLM = sys.argv[4] +season = sys.argv[5] +if season not in ['DJF','MAM','JJA','SON']: + print("ERROR: UNRECOGNIZED SEASON IN tci_from_cesm.py") + sys.exit(1) + +print("Starting Terrestrial Coupling Index Calculation for: "+season) +dsCLM = xr.open_dataset(fileCLM, decode_times=False) +dsCAM = xr.open_dataset(fileCAM, decode_times=False) + +# The same shape (dimensions/coordinates) are assumed between the CLM and CAM files +# However, some metadata differences prevent using Xarray functions to combine the fields. +# Thus, we extract the values from one Xarray object and attach it to the other. +dsCAM[soil_varname] = xr.DataArray(dsCLM[soil_varname].values,dims=['time','lat','lon'],coords=dsCAM.coords) +ds = dsCAM +print("Finished reading CAM and CLM files with Xarray.") + +# Add a Pandas date range to subset by season +time_units, reference_date = ds.time.attrs['units'].split('since') +if time_units.strip() not in ['D','days','Days','DAYS']: + print("ERROR: TIME UNITS EXPECTED TO BE DAYS IN tci_from_cesm.py") + sys.exit(1) +else: + ds['time'] = pd.date_range(start=reference_date, periods=ds.sizes['time'], freq='D') + +# Group the dataset by season and subset to the season the user requested +szn = ds.groupby('time.season')[season] + +# Use the shared coupling index function to compute the index +couplingIndex = land_sfc.calc_tci(szn[soil_varname],szn[sfc_flux_varname]) + +# Prepare for MET +# 1. Replace missing data with the MET missing data values +couplingIndex = xr.where(couplingIndex.isnull(),-9999.,couplingIndex) + +# 2. Pull out the values into a NumPy object +met_data = couplingIndex.values + +# 3. Reverse the ordering of met_data (flip the grid along the equator) +met_data = met_data[::-1] + +# 4. Create a time variable formatted the way MET expects +time_var = ds.time.dt.strftime('%Y%m%d_%H%M%S').values[0] + +# 5. Create grid information. The CESM is on a lat/lon projection. +# lower left corner latitude +lat_ll = ds.lat.min().values +# lower left corner longitude +lon_ll = ds.lon.min().values +# number of latitude +n_lat = ds.sizes['lat'] +# number of longitude +n_lon = ds.sizes['lon'] +# latitude grid spacing +delta_lat = (ds.lat.max().values-lat_ll)/n_lat +# longitude grid spacing +delta_lon = (ds.lon.max().values-lon_ll)/n_lon + +# 6. Create a dictionary for the LatLon grid and required attributes +grid_attrs = {'type': 'LatLon', + 'name': 'CESM Grid', + 'lat_ll': float(lat_ll), + 'lon_ll': float(lon_ll), + 'delta_lat': float(delta_lat), + 'delta_lon': float(delta_lon), + 'Nlat': int(n_lat), + 'Nlon': int(n_lon)} + +# 7. Populate the attributes dictionary for MET +attrs = {'valid': time_var, + 'init': time_var, + 'lead': "000000", + 'accum': "000000", + 'name': 'TCI', + 'standard_name': 'terrestrial_coupling_index', + 'long_name': 'terrestrial_coupling_index', + 'level': '10cm_soil_depth', + 'units': 'W/m2', + 'grid': grid_attrs} diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py index 1a2209aab6..23e1c696c0 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py @@ -4,74 +4,285 @@ User can change the verifying variable """ - -import numpy -import sys -import os +import datetime import glob +import metcalcpy.diagnostics.land_surface as land_sfc import numpy as np -import xarray as xr -import datetime -from datetime import date, timedelta import pandas as pd +import os +import sys +import xarray as xr +# Extra debugging +DEBUG = os.getenv('FLUXNET_DEBUG',False) +DEBUG = bool(DEBUG) -if len(sys.argv) < 2: - print("Must specify the following elements: FLUXNET2015_file season") - sys.exit(1) +# Climate model data typically doesn't include leap days, so it is excluded from observations by default +SKIP_LEAP = os.getenv('FLUXNET_SKIP_LEAP_DAYS',True) +SKIP_LEAP = bool(SKIP_LEAP) + +# For the finer resolution data, what fraction at the finer resoultion should pass QC to use the daily value? +DAILY_QC_THRESH = os.getenv('FLUXNET_HIGHRES_QC_THRESH',0.8) +DAILY_QC_THRESH = float(DAILY_QC_THRESH) + +# Number of days in an individual season to require +MIN_DAYS_SEASON = os.getenv('FLUXNET_MIN_DAYS_PER_SEASON',1) +MIN_DAYS_SEASON = int(MIN_DAYS_SEASON) -obsfile = os.path.expandvars(sys.argv[1]) -season = sys.argv[2] +# For all seasons (i.e. DJF 2000 + DJF 2001 ... DJF XXXX) in the analysis period, how many total days per site? +MIN_DAYS_SITE = os.getenv('FLUXNET_MIN_DAYS_PER_SITE_ALL_SEASONS',10) +MIN_DAYS_SITE = int(MIN_DAYS_SITE) -f2015data = xr.open_dataset(obsfile, decode_times=False) +# Pattern to use for searching for fluxnet files +#FILENAME_PATTERN = os.getenv('FLUXNET_RAW_FILENAME_PATTERN','AMF_*_DD_*.csv') +FILENAME_PATTERN = os.getenv('FLUXNET_RAW_FILENAME_PATTERN','FLX_*_DD_*.csv') +FILENAME_PATTERN = str(FILENAME_PATTERN) -if season=="DJF": - ss = 0 -elif season=="MAM": - ss = 1 -elif season=="JJA": - ss = 2 -elif season=="SON": - ss = 3 +def get_season_start_end(s,refdate): + + if s=='DJF': + if refdate.strftime('%m') in ['01','02']: + start = datetime.datetime((refdate.year)-1,12,1,0,0,0) + end = datetime.datetime(refdate.year,3,1,0,0,0) + else: + start = datetime.datetime(refdate.year,12,1,0,0,0) + end = datetime.datetime((refdate.year)+1,3,1,0,0,0) + elif season=='MAM': + start = datetime.datetime(refdate.year,3,1,0,0,0) + end = datetime.datetime(refdate.year,6,1,0,0,0) + elif season=='JJA': + start = datetime.datetime(refdate.year,6,1,0,0,0) + end = datetime.datetime(refdate.year,9,1,0,0,0) + elif season=='SON': + start = datetime.datetime(refdate.year,9,1,0,0,0) + end = datetime.datetime(refdate.year,12,1,0,0,0) + + return start, end + +# The script expects five arguments: +# raw_fluxnet_dir = Full path to the directory where the raw FLUXNET files are stored +# sfc_flux_varname = Field name in the raw_fluxnet_file to use when computing TCI +# soil_varname = Field name in the raw_fluxnet_file to use when computing TCI +# season = "DJF", "MAM", "JAJ", or "SON +if len(sys.argv) < 5: + print("Must specify the following elements: ") + print("raw_fluxnet_dir") + print("sfc_flux_varname") + print("soil_varname") + print("season (DJF, MAM, JJA, or SON)") + print("fluxnet_station_metadata_file") + sys.exit(1) + +# Store command line arguments +fndir = os.path.expandvars(sys.argv[1]) +sfc_flux_varname = varCAM = sys.argv[2] +sfc_qc = sfc_flux_varname+'_QC' +soil_varname = varCLM = sys.argv[3] +soil_qc = soil_varname+'_QC' +season = sys.argv[4] +fluxnetmeta = sys.argv[5] +if season not in ['DJF','MAM','JJA','SON']: + print("ERROR: UNRECOGNIZED SEASON IN fluxnet2015_tci.py") + sys.exit(1) + +# Dictionary mapping of which months go with which seasons +smap = {'DJF':[12,1,2],'MAM':[3,4,5],'JJA':[6,7,8],'SON':[9,10,11]} + +# Read station information from static file, because the raw FLUXNET data does not contain +# required metadata like station latitude/longitude that is required by MET. +if not os.path.exists(fluxnetmeta): + print("ERROR! FLUXNET METADATA FILE NOT PRESENT.") + sys.exit(1) else: - print('Unrecognized season, please use DJF, MAM, JJA, SON') - exit() - -start_year=f2015data['Start year'].values.tolist() -end_year=f2015data['End year'].values.tolist() -vld=pd.to_datetime(start_year,format='%Y') -vld=vld.strftime("%Y%m%d") -vld=vld+ '_000000' - -sid=f2015data['Station'].values.tolist() -print("Length :",len(sid)) -print("SID :",sid) - -lat=f2015data['Latitude'].values.tolist() -lon=f2015data['Longitude'].values.tolist() -# User can change the name of the variable below -obs=f2015data['CI Sfc_SM Latent_Heat'].values.tolist() -obs=np.array(obs) -obs=obs[:,ss] - - -#create dummy lists for the message type, elevation, variable name, level, height, and qc string -#numpy is more efficient at creating the lists, but need to convert to Pythonic lists -typ = np.full(len(sid),'ADPSFC').tolist() -elv = np.full(len(sid),10).tolist() -var = np.full(len(sid),'TCI').tolist() -lvl = np.full(len(sid),10).tolist() -hgt = np.zeros(len(sid),dtype=int).tolist() -qc = np.full(len(sid),'NA').tolist() -obs = np.where(np.isnan(obs), -9999, obs) -obs = np.full(len(sid),obs).tolist() -vld = np.full(len(sid),vld).tolist() - -l_tuple = list(zip(typ,sid,vld,lat,lon,elv,var,lvl,hgt,qc,obs)) -point_data = [list(ele) for ele in l_tuple] - -#print("Data Length:\t" + repr(len(point_data))) -#print("Data Type:\t" + repr(type(point_data))) - -#print(" Point Data Shape: ",np.shape(point_data)) -print(" Point Data: ",point_data) + sd = pd.read_csv(fluxnetmeta) + +print("Starting Terrestrial Coupling Index Calculation for: "+season) +# Locate files at the input directory +if not os.path.exists(fndir): + print("ERROR: FLUXNET INPUT DIRECTORY DOES NOT EXIST.") + sys.exit(1) +else: + fn_file_list = glob.glob(os.path.join(fndir,FILENAME_PATTERN)) + fn_stations = [os.path.basename(x).split('_')[1] for x in fn_file_list] + if fn_stations == []: + print("ERROR! NO FLUXNET DATA FOUND MATCHING FILE PATTERN "+FILENAME_PATTERN) + sys.exit(1) + else: + if DEBUG: + print("FOUND FLUXNET FILES FOR STATIONS:") + print(fn_stations) + else: + print("fluxnet2015_tci.py INFO: FOUND DATA FOR %04d FLUXNET SITES." % (int(len(fn_stations)))) + + # Let's try using a dictionary where the key is the site name and the value is the site file + # in an effort to keep the site ID's and filenames aligned for now + file_info = {station:file for station,file in tuple(zip(fn_stations,fn_file_list))} + +# Loop over all stations we have data for and ensure we have the required metadata +# and required variables in the data file. +# If we don't have the metadata or required variables, +# remove the station and file from the analysis +dflist = [] +discard = [] +allfiles = len(file_info) +for station,stationfile in file_info.items(): + if not sd['station'].astype('str').str.contains(station).any(): + if DEBUG: + print("WARNING! EXCLUDING SITE %s, NO METADATA FOUND IN fluxnetstations.csv" % (station)) + discard.append(station) + df = pd.read_csv(stationfile) + if (sfc_flux_varname in df.columns and soil_varname in df.columns and soil_qc in df.columns and sfc_qc in df.columns): + dflist.append(df) + else: + if DEBUG: + print("WARNING! EXCLUDING SITE %s, MISSING ONE OR MORE REQUIRED VARIABLES." % (station)) + discard.append(station) + +# Reset the file info +final_files = {station:stationfile for station,stationfile in file_info.items() if station not in discard} +print("fluxnet2015_tci.py INFO: DISCARDED %04d SITES DUE TO MISSING METADATA OR VARIABLES." % (int(allfiles-len(final_files)))) + +# Create a MET dataframe +metdf = pd.DataFrame(columns=['typ','sid','vld','lat','lon','elv','var','lvl','hgt','qc','obs']) +metdf['sid'] = final_files.keys() +metdf['typ'] = ['ADPSFC']*len(metdf) +metdf['elv'] = [10]*len(metdf) +metdf['lvl'] = [10]*len(metdf) +metdf['var'] = ['TCI']*len(metdf) +metdf['qc'] = ['NA']*len(metdf) +metdf['hgt'] = [0]*len(metdf) +metdf['lat'] = [sd[sd['station']==s]['lat'].values[0] for s in final_files.keys()] +metdf['lon'] = [sd[sd['station']==s]['lon'].values[0] for s in final_files.keys()] + +# Check and see what the length of metdf is here. If it is empty/zero, no FLUXNET data were found. +if len(metdf)==0: + print("ERROR! FOUND NO FLUXNET DATA FOR FILENAME_PATTERN AND METADATA PROVIDED. "+\ + "PLEASE RECONFIGURE AND TRY AGAIN.") + sys.exit(1) + +# Because the time record for each station is not the same, the dataframes cannot be merged. +# Since the goal is a single value for each site for all dates that fall in a season, +# the dataframes can remain separate. +for df,stn in tuple(zip(dflist,final_files.keys())): + + if DEBUG: + print("==============================") + print("PROCESSING STATION: %s" % (stn)) + + # Length of all data + alldays = len(df) + if DEBUG: + print("NUMBER OF DAYS AT THIS SITE: %04d" % (alldays)) + + # Do some checking for missing data. FLUXNET says that -9999 is usecd for missing data. + # Both the soil and surface variable must be present to compute TCI, so we only want + # to retain days where both are not missing. + df = df[(df[sfc_flux_varname]!=-9999.) & (df[soil_varname]!=-9999.)] + if DEBUG: + missdiff = int(alldays)-int(len(df)) + print("DISCARDED %04d DAYS WITH MISSING DATA (%3.2f %%)." % (int(alldays)-int(len(df)),((float(missdiff)/float(alldays))*100.0))) + + # Reset length of good data + alldays = len(df) + + # Only save data with quality above the threshold and reset the index + df = df[(df[sfc_qc].astype('float')>=DAILY_QC_THRESH)&(df[soil_qc].astype('float')>=DAILY_QC_THRESH)].reset_index() + if DEBUG: + print("DISCARDED %04d DAYS OF LOW QUALITY DATA FOR ALL SEASONS AT %s" % (int(alldays)-int(len(df)),stn)) + + # Print the number of days remaining after filtering + if DEBUG: + print("NUMBER OF DAYS AFTER FILTERING AT THIS SITE: %04d" % (alldays)) + + # Double check there's any valid data here + if len(df) <= 0: + if DEBUG: + print("WARNING! NO DATA LEFT AFTER QC FILTERING.") + metdf.loc[metdf['sid']==stn,'qc'] = '-9999' + continue + + # Add datetime column + df['datetime'] = pd.to_datetime(df['TIMESTAMP'],format='%Y%m%d') + + # Add a month column + df['month'] = df['datetime'].dt.strftime('%m') + + # Add a season column + df['season'] = [szn for mon in df['month'] for szn in smap if int(mon) in smap[szn]] + + # Add a station column + df['station_id'] = [stn]*len(df) + + # Subset by the requested season + df = df[df['season']==season] + if DEBUG: + print("TOTAL DAYS FOR %s AT THIS SITE: %04d" % (season,len(df))) + + # Double check there's any valid data here + if len(df) <= 0: + if DEBUG: + print("WARNING! NO DATA FOR REQUESTED SEASON.") + metdf.loc[metdf['sid']==stn,'qc'] = '-9999' + continue + + # If the season is DJF, remove leap days from February if requested + if season=='DJF' and SKIP_LEAP: + withleap = len(df) + df = df[~((df.datetime.dt.month == 2) & (df.datetime.dt.day == 29))] + if DEBUG: + print("REMOVED %03d LEAP DAYS." % (int(withleap)-int(len(df)))) + + # Get the start and end of the season of the first year + start, end = get_season_start_end(season,df['datetime'].iloc[0]) + + # We know the start and end date of the first season. We assume there is data in every season forward + # in time until we exceed the last date in the file. + badyrs = [] + limit = df['datetime'].iloc[-1] + while start <= limit: + year = end.strftime('%Y') + ndays = len(df[(df['datetime']>=start) & (df['datetime']<=(end-datetime.timedelta(days=1)))]) + if DEBUG: + print("FOR "+season+" ENDING %s FOUND %04d DAYS." % (year,ndays)) + + # Store the season ending years where there are not enough days + if ndays < MIN_DAYS_SEASON: + badyrs.append(year) + + # Move to the same season in the next consecutive year + start = datetime.datetime((start.year)+1,start.month,start.day,0,0,0) + end = datetime.datetime((end.year)+1,end.month,end.day,0,0,0) + + # Now actually remove the data we don't want to use + for year in badyrs: + start, end = get_season_start_end(season,datetime.datetime(int(year),1,1,0,0,0)) + if DEBUG: + print("REMOVING "+season+" ENDING %s" % (year)) + df = df[(df['datetime'](end-datetime.timedelta(days=1)))] + + # Double check there are sufficient days at this site for all seasons + if len(df)