From 3dcc2e61b9146b7fee7da69ff39da4dfcd462c5c Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Tue, 31 Oct 2023 13:41:28 -0600 Subject: [PATCH 01/31] Major refactor including elimination of unnecessary imports, only computing the index for the season requested instead of all seasons all the time, which also fixes a bug selecting which season the user requested. Results are identical for all seasons for the test data for the use case. --- .../cesm_tci.py | 219 +++++++++--------- 1 file changed, 112 insertions(+), 107 deletions(-) diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py index b7a97bdc97..47551732dd 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py @@ -6,114 +6,119 @@ from two CESM files User needs to provide the season (DJF, MAM, JJA, or SON) in the METplus conf file User can change the variables to compute TCI -""" +Modified Nov 2023 +""" -import numpy as np -import xarray as xr +import os import pandas as pd -import datetime -import time import sys -import os -import netCDF4 as nc - -if len(sys.argv) < 4: - print("Must specify the following elements: sfc_flux_file soil_file season (DJF, MAM, JJA, or SON)") - sys.exit(1) - -fileCLM = os.path.expandvars(sys.argv[1]) -fileCAM = os.path.expandvars(sys.argv[2]) -season = sys.argv[3] -var_y = sys.argv[4] - -print('Starting Terrestrial Coupling Index Calculation for: ',season) - -camDS_CLM45 = xr.open_dataset(fileCAM, decode_times=False) -print('Finished reading in CAM file') -clmDS_CLM45 = xr.open_dataset(fileCLM, decode_times=False) -print('Finished reading in CLM file') - -if season=="DJF": - ss = 0 -elif season=="MAM": - ss = 1 -elif season=="JJA": - ss = 2 -elif season=="SON": - ss = 3 -else: - sys.exit('ERROR : URECOGNIZED SEASON, PLEASE USE DJF, MAM, JJA, OR SON') - -units, reference_date = camDS_CLM45.time.attrs['units'].split('since') -camDS_CLM45['time'] = pd.date_range(start=reference_date, periods=camDS_CLM45.sizes['time'], freq='D') -camDS_time=camDS_CLM45.time[0] -dt_object = datetime.datetime.utcfromtimestamp(camDS_time.values.tolist()/1e9) -vDate=dt_object.strftime("%Y%m%d") - - -units, reference_date = clmDS_CLM45.time.attrs['units'].split('since') -clmDS_CLM45['time'] = pd.date_range(start=reference_date, periods=clmDS_CLM45.sizes['time'], freq='D') - -ds = camDS_CLM45 -ds['SOILWATER_10CM'] = (('time','lat','lon'), clmDS_CLM45.SOILWATER_10CM.values) - -xname = 'SOILWATER_10CM' # Controlling variable -#yname = 'LHFLX' # Responding variable -yname=str(var_y) - -xday = ds[xname].groupby('time.season') -yday = ds[yname].groupby('time.season') - -# Get the covariance of the two (numerator in coupling index) -covarTerm = ((xday - xday.mean()) * (yday - yday.mean())).groupby('time.season').sum() / xday.count() - -# Now compute the coupling index -couplingIndex = covarTerm/xday.std() -ci_season=couplingIndex[ss,:,:] - -ci = np.where(np.isnan(ci_season), -9999, ci_season) - -met_data = ci[:,:] -met_data = met_data[::-1].copy() - -#trim the lat/lon grids so they match the data fields -lat_met = camDS_CLM45.lat -lon_met = camDS_CLM45.lon -print(" Model Data shape: "+repr(met_data.shape)) -v_str = vDate -v_str = v_str + '_000000' -#print(" Valid date: "+v_str) -lat_ll = float(lat_met.min()) -lon_ll = float(lon_met.min()) -n_lat = lat_met.shape[0] -n_lon = lon_met.shape[0] -delta_lat = (float(lat_met.max()) - float(lat_met.min()))/float(n_lat) -delta_lon = (float(lon_met.max()) - float(lon_met.min()))/float(n_lon) - -print(f"variables:" - f"lat_ll: {lat_ll} lon_ll: {lon_ll} n_lat: {n_lat} n_lon: {n_lon} delta_lat: {delta_lat} delta_lon: {delta_lon}") - -attrs = { - 'valid': v_str, - 'init': v_str, - 'lead': "000000", - 'accum': "000000", - 'name': 'TCI', - 'standard_name': 'terrestrial_coupling_index', - 'long_name': 'terrestrial_coupling_index', - 'level': "10cm_soil_depth", - 'units': "W/m2", - - 'grid': { - 'type': "LatLon", - 'name': "CESM Grid", - 'lat_ll': lat_ll, - 'lon_ll': lon_ll, - 'delta_lat': delta_lat, - 'delta_lon': delta_lon, - 'Nlat': n_lat, - 'Nlon': n_lon, - } - } - +import xarray as xr +from tci_func import calc_tci + +# The script expects three arguments: +# sfc_flux_file = Latent Heat Flux file (from CAM, CESM "community atmosphere model") +# sfc_flux_varname = Field name in sfc_flux_file to use when computing TCI +# soil_file = Soil Temperature file (from CLM, CESM "community land model") +# soil_varname = Field name in soil_file to use when computing TCI +# season = "DJF", "MAM", "JJA", or "SON" +if len(sys.argv) < 6: + print("Must specify the following elements: ") + print("sfc_flux_file") + print("sfc_flux_varname") + print("soil_file") + print("soil_varname") + print("season (DJF, MAM, JJA, or SON)") + sys.exit(1) + +# Store command line arguments +fileCAM = os.path.expandvars(sys.argv[1]) +sfc_flux_varname = varCAM = sys.argv[2] +fileCLM = os.path.expandvars(sys.argv[3]) +soil_varname = varCLM = sys.argv[4] +season = sys.argv[5] +if season not in ['DJF','MAM','JJA','SON']: + print("ERROR: UNRECOGNIZED SEASON IN tci_from_cesm.py") + sys.exit(1) + +print("Starting Terrestrial Coupling Index Calculation for: "+season) + +dsCLM = xr.open_dataset(fileCLM, decode_times=False) +dsCAM = xr.open_dataset(fileCAM, decode_times=False) + +# The same shape (dimensions/coordinates) are assumed between the CLM and CAM files +# However, some metadata differences prevent using Xarray functions to combine the fields. +# Thus, we extract the values from one Xarray object and attach it to the other. +dsCAM[soil_varname] = xr.DataArray(dsCLM[soil_varname].values,dims=['time','lat','lon'],coords=dsCAM.coords) +ds = dsCAM + +print("Finished reading CAM and CLM files with Xarray.") + +# Add a Pandas date range to subset by season +time_units, reference_date = ds.time.attrs['units'].split('since') +if not time_units.strip() in ['D','days','Days','DAYS']: + print("ERROR: TIME UNITS EXPECTED TO BE DAYS IN tci_from_cesm.py") + sys.exit(1) +else: + ds['time'] = pd.date_range(start=reference_date, periods=ds.sizes['time'], freq='D') +print(ds) + +# Group the dataset by season and subset to the season the user requested +szn = ds.groupby('time.season')[season] + +# Compute the covariance of the surface flux and soil variables +covarTerm = ((szn[soil_varname]-szn[soil_varname].mean(dim='time')) * \ + (szn[sfc_flux_varname]-szn[sfc_flux_varname].mean(dim='time'))).sum(dim='time') / szn[soil_varname].count(dim='time') + +# Compute the coupling index +couplingIndex = covarTerm/szn[soil_varname].std(dim='time') + +# Prepare for MET +# 1. Replace missing data with the MET missing data values +couplingIndex = xr.where(couplingIndex.isnull(),-9999.,couplingIndex) + +# 2. Pull out the values into a NumPy object +met_data = couplingIndex.values + +# 3. Reverse the ordering of met_data (flip the grid along the equator) +met_data = met_data[::-1] + +# 4. Create a time variable formatted the way MET expects +time_var = ds.time.dt.strftime('%Y%m%d_%H%M%S').values[0] + +# 5. Create grid information. The CESM is on a lat/lon projection. +# lower left corner latitude +lat_ll = ds.lat.min().values +# lower left corner longitude +lon_ll = ds.lon.min().values +# number of latitude +n_lat = ds.sizes['lat'] +# number of longitude +n_lon = ds.sizes['lon'] +# latitude grid spacing +delta_lat = (ds.lat.max().values-lat_ll)/n_lat +# longitude grid spacing +delta_lon = (ds.lon.max().values-lon_ll)/n_lon + +# 6. Create a dictionary for the LatLon grid and required attributes +grid_attrs = {'type': 'LatLon', + 'name': 'CESM Grid', + 'lat_ll': float(lat_ll), + 'lon_ll': float(lon_ll), + 'delta_lat': float(delta_lat), + 'delta_lon': float(delta_lon), + 'Nlat': int(n_lat), + 'Nlon': int(n_lon)} + +# 7. Populate the attributes dictionary for MET +attrs = {'valid': time_var, + 'init': time_var, + 'lead': "000000", + 'accum': "000000", + 'name': 'TCI', + 'standard_name': 'terrestrial_coupling_index', + 'long_name': 'terrestrial_coupling_index', + 'level': '10cm_soil_depth', + 'units': 'W/m2', + 'grid': grid_attrs} + From 6d58b2c42faed91ea4ef79ec5595278f17b7593f Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Mon, 4 Mar 2024 20:28:21 +0000 Subject: [PATCH 02/31] Removes extraneous imports. --- .../fluxnet2015_tci.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py index 1a2209aab6..09386a2604 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py @@ -4,18 +4,12 @@ User can change the verifying variable """ - -import numpy import sys import os -import glob import numpy as np import xarray as xr -import datetime -from datetime import date, timedelta import pandas as pd - if len(sys.argv) < 2: print("Must specify the following elements: FLUXNET2015_file season") sys.exit(1) From 841f664816b29e9c111725f28eec2883b023e9da Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Mon, 4 Mar 2024 20:28:46 +0000 Subject: [PATCH 03/31] Switches to function call for the coupling index. --- .../PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py index 47551732dd..cf8f5a481c 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py @@ -66,12 +66,8 @@ # Group the dataset by season and subset to the season the user requested szn = ds.groupby('time.season')[season] -# Compute the covariance of the surface flux and soil variables -covarTerm = ((szn[soil_varname]-szn[soil_varname].mean(dim='time')) * \ - (szn[sfc_flux_varname]-szn[sfc_flux_varname].mean(dim='time'))).sum(dim='time') / szn[soil_varname].count(dim='time') - -# Compute the coupling index -couplingIndex = covarTerm/szn[soil_varname].std(dim='time') +# Use the shared coupling index function to compute the index +couplingIndex = calc_tci(szn[soil_varname],szn[sfc_flux_varname]) # Prepare for MET # 1. Replace missing data with the MET missing data values From edcb4d9a61761e6578b5478f0372fb5a62a4af50 Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Mon, 4 Mar 2024 20:30:10 +0000 Subject: [PATCH 04/31] Correct number of args in comment. --- .../PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py index cf8f5a481c..288cdff60a 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py @@ -16,7 +16,7 @@ import xarray as xr from tci_func import calc_tci -# The script expects three arguments: +# The script expects five arguments: # sfc_flux_file = Latent Heat Flux file (from CAM, CESM "community atmosphere model") # sfc_flux_varname = Field name in sfc_flux_file to use when computing TCI # soil_file = Soil Temperature file (from CLM, CESM "community land model") From cd9c2e28bf21c580f0d5963ee473ae0f76ab52ca Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Tue, 12 Mar 2024 21:26:18 +0000 Subject: [PATCH 05/31] Summation has to have a dimension supplied for the gridded data, but for pandas the only dimension is time (but it is un-named). Therefore the numerator for the covariance term had to be split out between the fcst and obs case. --- .../tci_func.py | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/tci_func.py diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/tci_func.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/tci_func.py new file mode 100644 index 0000000000..1e25a7b690 --- /dev/null +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/tci_func.py @@ -0,0 +1,32 @@ +from xarray.core.dataarray import DataArray +from pandas.core.series import Series + +#def calc_tci_cesm(soil_data,sfc_flux_data): +def calc_tci(soil_data,sfc_flux_data): + + # For Xarray objects, compute the mean + if isinstance(soil_data,DataArray) and isinstance(sfc_flux_data,DataArray): + soil_mean = soil_data.mean(dim='time') + soil_count = soil_data.count(dim='time') + sfc_flux_mean = sfc_flux_data.mean(dim='time') + soil_std = soil_data.std(dim='time') + print(((soil_data-soil_mean) * (sfc_flux_data-sfc_flux_mean))) + numer = ((soil_data-soil_mean) * (sfc_flux_data-sfc_flux_mean)).sum(dim='time') + # For Pandas objects, compute the mean + elif isinstance(soil_data,Series) and isinstance(sfc_flux_data,Series): + soil_mean = soil_data.mean() + soil_count = soil_data.count() + sfc_flux_mean = sfc_flux_data.mean() + soil_std = soil_data.std() + numer = ((soil_data-soil_mean) * (sfc_flux_data-sfc_flux_mean)).sum() + # No other object types are supported + else: + print("ERROR: Unrecognized Object Type in calc_tci.py") + print(type(soil_data)) + print(type(sfc_flux_data)) + exit(1) + + covarTerm = numer / soil_count + + return covarTerm/soil_std + From c12c1c6ad05546e24f51de854fce0f761482b0c8 Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Fri, 5 Apr 2024 17:44:32 +0000 Subject: [PATCH 06/31] Adds static station lookup file for use with Python embedding for FLUXNET observations. --- .../fluxnetstations.csv | 268 ++++++++++++++++++ 1 file changed, 268 insertions(+) create mode 100644 parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnetstations.csv diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnetstations.csv b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnetstations.csv new file mode 100644 index 0000000000..656fd7ef0e --- /dev/null +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnetstations.csv @@ -0,0 +1,268 @@ +station,lat,lon +AR-SLu,-33.4648,-66.4598 +AR-Vir,-28.2395,-56.1886 +AT-Neu,47.1167,11.3175 +AU-ASM,-22.2830,133.2490 +AU-Ade,-13.0769,131.1178 +AU-Cpr,-34.0021,140.5891 +AU-Cum,-33.6152,150.7236 +AU-DaP,-14.0633,131.3181 +AU-DaS,-14.1593,131.3881 +AU-Dry,-15.2588,132.3706 +AU-Emr,-23.8587,148.4746 +AU-Fog,-12.5452,131.3072 +AU-GWW,-30.1913,120.6541 +AU-Gin,-31.3764,115.7138 +AU-How,-12.4943,131.1523 +AU-Lox,-34.4704,140.6551 +AU-RDF,-14.5636,132.4776 +AU-Rig,-36.6499,145.5759 +AU-Rob,-17.1175,145.6301 +AU-Stp,-17.1507,133.3502 +AU-TTE,-22.2870,133.6400 +AU-Tum,-35.6566,148.1517 +AU-Wac,-37.4259,145.1878 +AU-Whr,-36.6732,145.0294 +AU-Wom,-37.4222,144.0944 +AU-Ync,-34.9893,146.2907 +BE-Bra,51.3076,4.5198 +BE-Lon,50.5516,4.7462 +BE-Vie,50.3049,5.9981 +BR-Npw,-16.4980,-56.4120 +BR-Sa1,-2.8567,-54.9589 +BR-Sa3,-3.0180,-54.9714 +BW-Gum,-18.9647,22.3711 +BW-Nxr,-19.5481,23.1792 +CA-Gro,48.2167,-82.1556 +CA-Man,55.8796,-98.4808 +CA-NS1,55.8792,-98.4839 +CA-NS2,55.9058,-98.5247 +CA-NS3,55.9117,-98.3822 +CA-NS4,55.9144,-98.3806 +CA-NS5,55.8631,-98.4850 +CA-NS6,55.9167,-98.9644 +CA-NS7,56.6358,-99.9483 +CA-Oas,53.6289,-106.1978 +CA-Obs,53.9872,-105.1178 +CA-Qfo,49.6925,-74.3421 +CA-SCB,61.3089,-121.2984 +CA-SCC,61.3079,-121.2992 +CA-SF1,54.4850,-105.8176 +CA-SF2,54.2539,-105.8775 +CA-SF3,54.0916,-106.0053 +CA-TP1,42.6609,-80.5595 +CA-TP2,42.7744,-80.4588 +CA-TP3,42.7068,-80.3483 +CA-TP4,42.7102,-80.3574 +CA-TPD,42.6353,-80.5577 +CG-Tch,-4.2892,11.6564 +CH-Cha,47.2102,8.4104 +CH-Dav,46.8153,9.8559 +CH-Fru,47.1158,8.5378 +CH-Lae,47.4783,8.3644 +CH-Oe1,47.2858,7.7319 +CH-Oe2,47.2864,7.7337 +CN-Cha,42.4025,128.0958 +CN-Cng,44.5934,123.5092 +CN-Dan,30.4978,91.0664 +CN-Din,23.1733,112.5361 +CN-Du2,42.0467,116.2836 +CN-Du3,42.0551,116.2809 +CN-Ha2,37.6086,101.3269 +CN-HaM,37.3700,101.1800 +CN-Hgu,32.8453,102.5900 +CN-Qia,26.7414,115.0581 +CN-Sw2,41.7902,111.8971 +CZ-BK1,49.5021,18.5369 +CZ-BK2,49.4944,18.5429 +CZ-wet,49.0247,14.7704 +DE-Akm,53.8662,13.6834 +DE-Dgw,53.1514,13.0543 +DE-Geb,51.0997,10.9146 +DE-Gri,50.9500,13.5126 +DE-Hai,51.0792,10.4522 +DE-Hte,54.2103,12.1761 +DE-Kli,50.8931,13.5224 +DE-Lkb,49.0996,13.3047 +DE-Lnf,51.3282,10.3678 +DE-Obe,50.7867,13.7213 +DE-RuR,50.6219,6.3041 +DE-RuS,50.8659,6.4471 +DE-Seh,50.8706,6.4497 +DE-SfN,47.8064,11.3275 +DE-Spw,51.8922,14.0337 +DE-Tha,50.9626,13.5651 +DE-Zrk,53.8759,12.8890 +DK-Eng,55.6905,12.1918 +DK-Fou,56.4842,9.5872 +DK-Sor,55.4859,11.6446 +ES-Amo,36.8336,-2.2523 +ES-LJu,36.9266,-2.7521 +ES-LgS,37.0979,-2.9658 +ES-Ln2,36.9695,-3.4758 +FI-Hyy,61.8474,24.2948 +FI-Jok,60.8986,23.5134 +FI-Let,60.6418,23.9595 +FI-Lom,67.9972,24.2092 +FI-Si2,61.8372,24.1967 +FI-Sii,61.8327,24.1928 +FI-Sod,67.3624,26.6386 +FR-Fon,48.4764,2.7801 +FR-Gri,48.8442,1.9519 +FR-LBr,44.7171,-0.7693 +FR-LGt,47.3229,2.2841 +FR-Pue,43.7413,3.5957 +GF-Guy,5.2788,-52.9249 +GH-Ank,5.2685,-2.6942 +GL-NuF,64.1308,-51.3861 +GL-ZaF,74.4814,-20.5545 +GL-ZaH,74.4733,-20.5503 +HK-MPM,22.4982,114.0292 +ID-Pag,-2.3200,113.9000 +IT-BCi,40.5237,14.9574 +IT-CA1,42.3804,12.0266 +IT-CA2,42.3772,12.0260 +IT-CA3,42.3800,12.0222 +IT-Cas,45.0700,8.7175 +IT-Col,41.8494,13.5881 +IT-Cp2,41.7043,12.3573 +IT-Cpz,41.7052,12.3761 +IT-Isp,45.8126,8.6336 +IT-La2,45.9542,11.2853 +IT-Lav,45.9562,11.2813 +IT-MBo,46.0147,11.0458 +IT-Noe,40.6062,8.1517 +IT-PT1,45.2009,9.0610 +IT-Ren,46.5869,11.4337 +IT-Ro1,42.4081,11.9300 +IT-Ro2,42.3903,11.9209 +IT-SR2,43.7320,10.2909 +IT-SRo,43.7279,10.2844 +IT-Tor,45.8444,7.5781 +JP-BBY,43.3230,141.8107 +JP-MBF,44.3869,142.3186 +JP-Mse,36.0539,140.0269 +JP-SMF,35.2617,137.0788 +JP-SwL,36.0466,138.1084 +KR-CRK,38.2013,127.2506 +MY-MLM,1.4536,111.1495 +MY-PSO,2.9730,102.3062 +NL-Hor,52.2403,5.0713 +NL-Loo,52.1666,5.7436 +NZ-Kop,-37.3879,175.5539 +PA-SPn,9.3181,-79.6346 +PA-SPs,9.3138,-79.6314 +PH-RiF,14.1412,121.2653 +RU-Ch2,68.6169,161.3509 +RU-Che,68.6130,161.3414 +RU-Cok,70.8291,147.4943 +RU-Fy2,56.4476,32.9019 +RU-Fyo,56.4615,32.9221 +RU-Ha1,54.7252,90.0022 +RU-Sam,72.3738,126.4958 +RU-SkP,62.2550,129.1680 +RU-Tks,71.5943,128.8878 +RU-Vrk,67.0547,62.9405 +SD-Dem,13.2829,30.4783 +SE-Deg,64.1820,19.5565 +SE-St1,68.3541,19.0503 +SJ-Adv,78.1860,15.9230 +SJ-Blv,78.9216,11.8311 +SN-Dhr,15.4028,-15.4322 +UK-LBT,51.5215,-0.1389 +US-A03,70.4953,-149.8823 +US-A10,71.3242,-156.6149 +US-AR1,36.4267,-99.4200 +US-AR2,36.6358,-99.5975 +US-ARM,36.6058,-97.4888 +US-ARb,35.5497,-98.0402 +US-ARc,35.5465,-98.0400 +US-Atq,70.4696,-157.4089 +US-BZB,64.6955,-148.3208 +US-BZF,64.7013,-148.3121 +US-BZS,64.6963,-148.3235 +US-Beo,71.2810,-156.6123 +US-Bes,71.2809,-156.5965 +US-Bi1,38.0992,-121.4993 +US-Bi2,38.1091,-121.5351 +US-Blo,38.8953,-120.6328 +US-CRT,41.6285,-83.3471 +US-Cop,38.0900,-109.3900 +US-DPW,28.0521,-81.4361 +US-EDN,37.6156,-122.1140 +US-EML,63.8784,-149.2536 +US-GBT,41.3658,-106.2397 +US-GLE,41.3665,-106.2399 +US-Goo,34.2547,-89.8735 +US-HRA,34.5852,-91.7517 +US-HRC,34.5888,-91.7517 +US-Ha1,42.5378,-72.1715 +US-Ho1,45.2041,-68.7402 +US-IB2,41.8406,-88.2410 +US-ICs,68.6058,-149.3110 +US-Ivo,68.4865,-155.7503 +US-KS1,28.4583,-80.6709 +US-KS2,28.6086,-80.6715 +US-LA1,29.5013,-90.4449 +US-LA2,29.8587,-90.2869 +US-LWW,34.9604,-97.9789 +US-Lin,36.3566,-119.0922 +US-Los,46.0827,-89.9792 +US-MAC,27.1632,-81.1873 +US-MMS,39.3232,-86.4131 +US-MRM,40.8164,-74.0435 +US-Me1,44.5794,-121.5000 +US-Me2,44.4526,-121.5589 +US-Me3,44.3154,-121.6078 +US-Me4,44.4992,-121.6224 +US-Me5,44.4372,-121.5668 +US-Me6,44.3233,-121.6078 +US-Myb,38.0499,-121.7650 +US-NC4,35.7879,-75.9038 +US-NGB,71.2800,-156.6092 +US-NGC,64.8618,-163.7002 +US-NR1,40.0329,-105.5464 +US-Ne1,41.1651,-96.4766 +US-Ne2,41.1649,-96.4701 +US-Ne3,41.1797,-96.4397 +US-ORv,40.0201,-83.0183 +US-OWC,41.3795,-82.5125 +US-Oho,41.5545,-83.8438 +US-PFa,45.9459,-90.2723 +US-Prr,65.1237,-147.4876 +US-SRC,31.9083,-110.8395 +US-SRG,31.7894,-110.8277 +US-SRM,31.8214,-110.8661 +US-Snd,38.0366,-121.7540 +US-Sne,38.0369,-121.7547 +US-Srr,38.2006,-122.0264 +US-StJ,39.0882,-75.4372 +US-Sta,41.3966,-106.8024 +US-Syv,46.2420,-89.3477 +US-Ton,38.4309,-120.9660 +US-Tw1,38.1074,-121.6469 +US-Tw2,38.0969,-121.6365 +US-Tw3,38.1152,-121.6469 +US-Tw4,38.1027,-121.6413 +US-Tw5,38.1072,-121.6426 +US-Twt,38.1087,-121.6531 +US-UMB,45.5598,-84.7138 +US-UMd,45.5625,-84.6975 +US-Uaf,64.8663,-147.8555 +US-Var,38.4133,-120.9508 +US-WCr,45.8059,-90.0799 +US-WPT,41.4646,-82.9962 +US-Whs,31.7438,-110.0522 +US-Wi0,46.6188,-91.0814 +US-Wi1,46.7305,-91.2329 +US-Wi2,46.6869,-91.1528 +US-Wi3,46.6347,-91.0987 +US-Wi4,46.7393,-91.1663 +US-Wi5,46.6531,-91.0858 +US-Wi6,46.6249,-91.2982 +US-Wi7,46.6491,-91.0693 +US-Wi8,46.7223,-91.2524 +US-Wi9,46.7385,-91.0746 +US-Wkg,31.7365,-109.9419 +ZA-Kru,-25.0197,31.4969 +ZM-Mon,-15.4391,23.2525 From 8b94a647bb355d164a6a33ccff07e69c81c83464 Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Fri, 5 Apr 2024 17:45:26 +0000 Subject: [PATCH 07/31] Major overhaul to forecast Python embedding script for the TCI use case. --- .../PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py index 288cdff60a..0c568ae538 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py @@ -9,12 +9,11 @@ Modified Nov 2023 """ - +import metcalcpy.diagnostics.land_surface as land_sfc import os import pandas as pd import sys import xarray as xr -from tci_func import calc_tci # The script expects five arguments: # sfc_flux_file = Latent Heat Flux file (from CAM, CESM "community atmosphere model") @@ -42,7 +41,6 @@ sys.exit(1) print("Starting Terrestrial Coupling Index Calculation for: "+season) - dsCLM = xr.open_dataset(fileCLM, decode_times=False) dsCAM = xr.open_dataset(fileCAM, decode_times=False) @@ -51,7 +49,6 @@ # Thus, we extract the values from one Xarray object and attach it to the other. dsCAM[soil_varname] = xr.DataArray(dsCLM[soil_varname].values,dims=['time','lat','lon'],coords=dsCAM.coords) ds = dsCAM - print("Finished reading CAM and CLM files with Xarray.") # Add a Pandas date range to subset by season @@ -61,13 +58,12 @@ sys.exit(1) else: ds['time'] = pd.date_range(start=reference_date, periods=ds.sizes['time'], freq='D') -print(ds) # Group the dataset by season and subset to the season the user requested szn = ds.groupby('time.season')[season] # Use the shared coupling index function to compute the index -couplingIndex = calc_tci(szn[soil_varname],szn[sfc_flux_varname]) +couplingIndex = land_sfc.calc_tci(szn[soil_varname],szn[sfc_flux_varname]) # Prepare for MET # 1. Replace missing data with the MET missing data values @@ -117,4 +113,4 @@ 'level': '10cm_soil_depth', 'units': 'W/m2', 'grid': grid_attrs} - + From 7d65a3c6a44a7cb62045b14c03b3d346e6f925cd Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Fri, 5 Apr 2024 17:46:06 +0000 Subject: [PATCH 08/31] Major overhaul to observation Python embedding script for the TCI use case, to compute TCI from raw observations rather than read pre-computed TCI. --- .../fluxnet2015_tci.py | 216 +++++++++++++++++- 1 file changed, 210 insertions(+), 6 deletions(-) diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py index 09386a2604..c64f7b3db3 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py @@ -4,15 +4,219 @@ User can change the verifying variable """ -import sys -import os +import datetime +import glob +import metcalcpy.diagnostics.land_surface as land_sfc import numpy as np -import xarray as xr import pandas as pd +import os +import sys +import xarray as xr + +# For the finer resolution data, what percentage at the finer resoultion should pass for the daily data? +DAILY_QC_THRESH=0.8 +# TODO: This needs to be evaluated PER season, not on all days in all DJF's, for example +MIN_DAYS_SEASON=80 +MIN_DAYS_SITE=450 + +def get_season_start_end(s,refdate): + + if s=='DJF': + if refdate.strftime('%m') in ['01','02']: + start = datetime.datetime((refdate.year)-1,12,1,0,0,0) + end = datetime.datetime(refdate.year,3,1,0,0,0) + else: + start = datetime.datetime(refdate.year,12,1,0,0,0) + end = datetime.datetime((refdate.year)+1,3,1,0,0,0) + elif season=='MAM': + start = datetime.datetime(refdate.year,3,1,0,0,0) + end = datetime.datetime(refdate.year,6,1,0,0,0) + elif season=='JJA': + start = datetime.datetime(refdate.year,6,1,0,0,0) + end = datetime.datetime(refdate.year,9,1,0,0,0) + elif season=='SON': + start = datetime.datetime(refdate.year,9,1,0,0,0) + end = datetime.datetime(refdate.year,12,1,0,0,0) + + return start, end + +# The script expects four arguments: +# raw_fluxnet_dir = Full path to the directory where the raw FLUXNET files are stored +# sfc_flux_varname = Field name in the raw_fluxnet_file to use when computing TCI +# soil_varname = Field name in the raw_fluxnet_file to use when computing TCI +# season = "DJF", "MAM", "JAJ", or "SON +if len(sys.argv) < 5: + print("Must specify the following elements: ") + print("raw_fluxnet_dir") + print("sfc_flux_varname") + print("soil_varname") + print("season (DJF, MAM, JJA, or SON)") + sys.exit(1) + +# Store command line arguments +fndir = os.path.expandvars(sys.argv[1]) +sfc_flux_varname = varCAM = sys.argv[2] +sfc_qc = sfc_flux_varname+'_QC' +soil_varname = varCLM = sys.argv[3] +soil_qc = soil_varname+'_QC' +season = sys.argv[4] +if season not in ['DJF','MAM','JJA','SON']: + print("ERROR: UNRECOGNIZED SEASON IN tci_from_cesm.py") + sys.exit(1) + +# Dictionary mapping of which months go with which seasons +smap = {'DJF':[12,1,2],'MAM':[3,4,5],'JJA':[6,7,8],'SON':[9,10,11]} + +# Read station information from static file, because the raw FLUXNET data does not contain +# required metadata like station latitude/longitude that is required by MET. +sd = pd.read_csv('fluxnetstations.csv') + +print("Starting Terrestrial Coupling Index Calculation for: "+season) +# Locate files at the input directory +if not os.path.exists(fndir): + print("ERROR: FLUXNET INPUT DIRECTORY DOES NOT EXIST.") + sys.exit(1) +else: + fn_file_list = glob.glob(os.path.join(fndir,'FLX_*_DD_*.csv')) + fn_stations = [os.path.basename(x).split('_')[1] for x in fn_file_list] + print("FOUND FLUXNET FILES FOR STATIONS:") + [print(x) for x in fn_stations] + +# Loop over all stations we have data for and ensure we have the required metadata +keep_stations = [] +for s in fn_stations: + if not sd['station'].astype('str').str.contains(s).any(): + print("WARNING! EXCLUDING SITE %s, NO METADATA FOUND IN fluxnetstations.csv" % (s)) + else: + keep_stations.append(s) +fn_stations = keep_stations + +# Create a MET dataframe +metdf = pd.DataFrame(columns=['typ','sid','vld','lat','lon','elv','var','lvl','hgt','qc','obs']) +metdf['sid'] = fn_stations +metdf['typ'] = ['ADPSFC']*len(metdf) +metdf['elv'] = [10]*len(metdf) +metdf['lvl'] = [10]*len(metdf) +metdf['var'] = ['TCI']*len(metdf) +metdf['qc'] = ['NA']*len(metdf) +metdf['hgt'] = [0]*len(metdf) +metdf['lat'] = [sd[sd['station']==s]['lat'].values[0] for s in fn_stations] +metdf['lon'] = [sd[sd['station']==s]['lon'].values[0] for s in fn_stations] + +# TODO: DEBUG +print(metdf) + +# Open each of the fluxnet files as a pandas dataframe +# HH files have different time columns than DD files +#dflist = [pd.read_csv(x,header=0,usecols=['TIMESTAMP_START','TIMESTAMP_END',sfc_flux_varname,soil_varname]) for x in fn_file_list] +dflist = [pd.read_csv(x,usecols=['TIMESTAMP',sfc_flux_varname,sfc_qc,soil_varname,soil_qc]) for x in fn_file_list] + +# Because the time record for each station is not the same, the dataframes cannot be merged. +# Given the goal is a single value for each site for all dates that fall in a season, +# the dataframes can remain separate. + +sample_df = [] + +for df,stn in tuple(zip(dflist,fn_stations)): + + print("PROCESSING STATION: %s" % (stn)) + + # Length of all data + alldays = len(df) + + # Only save data with quality above the threshold and reset the index + df = df[(df[sfc_qc].astype('float')>=DAILY_QC_THRESH)&(df[soil_qc].astype('float')>=DAILY_QC_THRESH)].reset_index() + + print("INFO: DISCARDED %04d DAYS OF LOW QUALITY DATA FOR ALL SEASONS AT %s" % (int(alldays)-int(len(df)),stn)) + + # Add datetime column + df['datetime'] = pd.to_datetime(df['TIMESTAMP'],format='%Y%m%d') + + # Add a month column + df['month'] = df['datetime'].dt.strftime('%m') + + # Add a season column + df['season'] = [szn for mon in df['month'] for szn in smap if int(mon) in smap[szn]] + + # Add a station column + df['station_id'] = [stn]*len(df) + + # Subset by the requested season + df = df[df['season']==season] + + # Get the start and end of the season of the first year + start, end = get_season_start_end(season,df['datetime'].iloc[0]) + + # We know the start and end date of the first season. We assume there is data in every season forward + # in time until we exceed the last date in the file. + badyrs = [] + limit = df['datetime'].iloc[-1] + while start <= limit: + year = end.strftime('%Y') + ndays = len(df[(df['datetime']>=start) & (df['datetime']<=(end-datetime.timedelta(days=1)))]) + print("FOR "+season+" ENDING %s FOUND %04d DAYS." % (year,ndays)) + + # Store the season ending years where there are not enough days + if ndays < MIN_DAYS_SEASON: + badyrs.append(year) + + # Move to the same season in the next consecutive year + start = datetime.datetime((start.year)+1,start.month,start.day,0,0,0) + end = datetime.datetime((end.year)+1,end.month,end.day,0,0,0) + + # Now actually remove the data we don't want to use + for year in badyrs: + start, end = get_season_start_end(season,datetime.datetime(int(year),1,1,0,0,0)) + print("REMOVING "+season+" ENDING %s" % (year)) + df = df[(df['datetime'](end-datetime.timedelta(days=1)))] + + # TODO: Double check each season has at least 80 days? + print("INFO: USING %04d DAYS OF DATA AT %s FOR %s" % (int(len(df)),stn,season)) + + # Double check there are sufficient days at this site for all seasons + if len(df) Date: Fri, 5 Apr 2024 19:01:46 +0000 Subject: [PATCH 09/31] Updates documentation file for TCI use case. --- .../PointStat_fcstCESM_obsFLUXNET2015_TCI.py | 52 ++++++++++++++----- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py index 8fb804771c..ef79622a54 100644 --- a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py +++ b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py @@ -8,47 +8,68 @@ ############################################################################## # Scientific Objective # -------------------- -# This use case ingests two CESM (CAM and CLM) files and a new FLUX2015 dataset (NETCDF) that Paul Dirmeyer, GMU prepared from FLUXNET2015 ASCII dataset. -# The use case will calculate Terrestrial Coupling Index (TCI) from CESM datasets. -# Utilizing Python embedding, this use case taps into a new vital observation dataset and compares it to CESM simulations TCI forecast. -# Finally, it will generate plots of model TCI and observed TCI. +# This use case ingests two CESM (CAM and CLM) files and raw FLUXNET2015 data. +# The use case calculates the Terrestrial Coupling Index (TCI) from the CESM forecasts and FLUXNET observations. +# Utilizing Python embedding, this use case taps into a new vital observation dataset and compares it to CESM forecasts of TCI. +# Finally, it will generate plots of model forecast TCI overlaid with TCI observations at FLUXNET sites.. ############################################################################## # Datasets # --------------------- # -# | **Forecast:** CESM 1979-1983 Simulations a. Community Land Model (CLM) and b. Community Atmosphere Model (CAM) -# -# | **Observations:** FLUXNET2015 post processed and converted to NETCDF. This data includes data collected from multiple regional flux towers. +# | **Forecast:** CESM 1979-1983 Simulations +# | * Community Land Model (CLM) file +# | * Community Atmosphere Model (CAM) file # +# | **Observations:** Raw FLUXNET2015 observations # # | **Location:** All of the input data required for this use case can be found in the met_test sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases # | This tarball should be unpacked into the directory that you will set the value of INPUT_BASE. See `Running METplus`_ section for more information. # -# | **Data Source:** CESM - CGD; FLUXNET2015 - Paul Dirmeyer +# | **Data Source:** CESM - NSF NCAR Climate & Global Dynamics (CGD); FLUXNET2015 "SUBSET" Data Product: https://fluxnet.org/data/fluxnet2015-dataset/subset-data-product/ + +############################################################################## +# Python Dependencies +# --------------------- +# +# This use case requires the following Python dependencies:: +# +# * Xarray +# * Pandas +# * METcalcpy +# ############################################################################## # METplus Components # ------------------ # # This use case utilizes the METplus PyEmbedIngest to read the CESM files and calculate TCI using python embedding and a NETCDF file of the TCI is generated. -# The METplus PointStat processes the output of PyEmbedIngest and FLUXNET2015 dataset (using python embedding), and outputs the requested line types. -# Then METplus PlotPointObs tool reads the output of PyEmbedIngest and FLUXNET2015 dataset and produce plots of TCI from CESM and point observations. +# The METplus PointStat processes the output of PyEmbedIngest and FLUXNET2015 dataset (using Python embedding), and outputs the requested line types. +# Then the METplus PlotPointObs tool reads the output of PyEmbedIngest and FLUXNET2015 dataset and produce plots of TCI from CESM and point observations. # A custom loop runs through all the pre-defined seasons (DJF, MAM, JJA, SON) and runs PyEmbedIngest, PointStat, and PlotPointObs. ############################################################################## # METplus Workflow # ---------------- # -# The PyEmbedIngest tool reads 2 CESM files containing Soil Moisture and Sensible Heat Flux, each composed of daily forecasts from -# 1979 to 1983 and calculates TCI and generates a NETCDF file of the TCI. One FLUXNET2015 NETCDF file containing station observations -# of several variables including Coupling Index of Soil Moisture and Sensible Heat Flux is read by Python Embedding. +# The PyEmbedIngest tool reads 2 CESM files containing Soil Moisture (CLM file) and Sensible Heat Flux (CAM file), each composed of daily forecasts from +# 1979 to 1983 and calculates TCI and generates a NETCDF file of the TCI. Raw CSV files containing FLUXNET station observations of latent heat flux (LE_F_MDS) +# and soil water content at the shallowest level (SWC_F_MDS_1) are read using Python embedding, and TCI is computed. # # | **Valid Beg:** 1979-01-01 at 00z # | **Valid End:** 1979-01-01 at 00z # # PointStat is used to compare the two new fields (TCI calculated from CESM dataset and FLUXNET2015). -# Finally, PlotPointObs is run to plot the CESM TCI overlaying the FLUXNET2015 point observations. +# Finally, PlotPointObs is run to plot the CESM TCI overlaying the FLUXNET2015 point observations. +# +# .. note:: +# +# The CESM forecasts of TCI cover a time period prior to the availability of FLUXNET observations. Thus, +# this use case should be a demonstration of the capability to read CESM forecast data, raw FLUXNET observation data, +# and compute TCI rather than be used for scientific purposes. The use case is designed to enforce seasonal alignment, +# but it is not designed to enforce date/time alignment. In this casee, the CESM data cover 1979-1983, whereas the sample +# FLUXNET observations cover varying time ranges depending on the site. +# ############################################################################## # METplus Configuration @@ -88,6 +109,9 @@ # .. highlight:: python # .. literalinclude:: ../../../../parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py # +# Both of the above Python embedding scripts compute TCI using the `calc_tci()` function in METcalcpy. See the METcalcpy +# documentation for more information. +# ############################################################################## # Running METplus From 29c7ad73b5fc20aacbc02bb02cf2f0412b5e5cfa Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Fri, 5 Apr 2024 21:45:46 +0000 Subject: [PATCH 10/31] Adds METcalcpy version number. --- .../land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py index ef79622a54..8103fd5a49 100644 --- a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py +++ b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py @@ -34,9 +34,9 @@ # # This use case requires the following Python dependencies:: # -# * Xarray -# * Pandas -# * METcalcpy +# Xarray +# Pandas +# METcalcpy 3.0.0+ # ############################################################################## From 48799557960ec558173653d62b59e66cd8470ed0 Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Fri, 5 Apr 2024 21:50:37 +0000 Subject: [PATCH 11/31] Refactors wording and fixes typo. --- .../PointStat_fcstCESM_obsFLUXNET2015_TCI.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py index 8103fd5a49..093c09a6d2 100644 --- a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py +++ b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py @@ -64,11 +64,11 @@ # # .. note:: # -# The CESM forecasts of TCI cover a time period prior to the availability of FLUXNET observations. Thus, -# this use case should be a demonstration of the capability to read CESM forecast data, raw FLUXNET observation data, -# and compute TCI rather than be used for scientific purposes. The use case is designed to enforce seasonal alignment, -# but it is not designed to enforce date/time alignment. In this casee, the CESM data cover 1979-1983, whereas the sample -# FLUXNET observations cover varying time ranges depending on the site. +# The CESM forecasts cover a time period prior to the availability of FLUXNET observations. Thus, +# this use case should be considered a demonstration of the capability to read CESM forecast data, +# raw FLUXNET observation data, and compute TCI, rather than a bonafide scientific application. +# The use case is designed to enforce seasonal alignment, but it is not designed to enforce date/time alignment. +# In this case, the CESM data cover 1979-1983, whereas the sample FLUXNET observations cover varying time ranges depending on the site. # ############################################################################## From 91faa5906eadd98ba587dd8f889c7dd634a15555 Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Fri, 5 Apr 2024 21:53:09 +0000 Subject: [PATCH 12/31] Fixes RST formatting. --- .../land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py index 093c09a6d2..df54201739 100644 --- a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py +++ b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py @@ -55,7 +55,7 @@ # The PyEmbedIngest tool reads 2 CESM files containing Soil Moisture (CLM file) and Sensible Heat Flux (CAM file), each composed of daily forecasts from # 1979 to 1983 and calculates TCI and generates a NETCDF file of the TCI. Raw CSV files containing FLUXNET station observations of latent heat flux (LE_F_MDS) # and soil water content at the shallowest level (SWC_F_MDS_1) are read using Python embedding, and TCI is computed. -# +# # | **Valid Beg:** 1979-01-01 at 00z # | **Valid End:** 1979-01-01 at 00z # From 0cf8f28f04712118102d40b03da2a9a32feb8bfd Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Fri, 5 Apr 2024 22:03:28 +0000 Subject: [PATCH 13/31] Finally fixed RST error. --- .../PointStat_fcstCESM_obsFLUXNET2015_TCI.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py index df54201739..6ae0ba9f86 100644 --- a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py +++ b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py @@ -27,6 +27,7 @@ # | This tarball should be unpacked into the directory that you will set the value of INPUT_BASE. See `Running METplus`_ section for more information. # # | **Data Source:** CESM - NSF NCAR Climate & Global Dynamics (CGD); FLUXNET2015 "SUBSET" Data Product: https://fluxnet.org/data/fluxnet2015-dataset/subset-data-product/ +# ############################################################################## # Python Dependencies @@ -34,9 +35,9 @@ # # This use case requires the following Python dependencies:: # -# Xarray -# Pandas -# METcalcpy 3.0.0+ +# * Xarray +# * Pandas +# * METcalcpy 3.0.0+ # ############################################################################## @@ -47,6 +48,7 @@ # The METplus PointStat processes the output of PyEmbedIngest and FLUXNET2015 dataset (using Python embedding), and outputs the requested line types. # Then the METplus PlotPointObs tool reads the output of PyEmbedIngest and FLUXNET2015 dataset and produce plots of TCI from CESM and point observations. # A custom loop runs through all the pre-defined seasons (DJF, MAM, JJA, SON) and runs PyEmbedIngest, PointStat, and PlotPointObs. +# ############################################################################## # METplus Workflow @@ -58,7 +60,7 @@ # # | **Valid Beg:** 1979-01-01 at 00z # | **Valid End:** 1979-01-01 at 00z -# +# # PointStat is used to compare the two new fields (TCI calculated from CESM dataset and FLUXNET2015). # Finally, PlotPointObs is run to plot the CESM TCI overlaying the FLUXNET2015 point observations. # From 7cc809b1bdec7fffbf3d3a2389acbd60285e1193 Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Fri, 5 Apr 2024 22:08:29 +0000 Subject: [PATCH 14/31] Adds support to remove leap days if requested. --- .../fluxnet2015_tci.py | 51 +++++++++++-------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py index c64f7b3db3..d8e761e37b 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py @@ -13,12 +13,21 @@ import sys import xarray as xr +# Climate model data typically doesn't include leap days, so it is excluded from observations by default +SKIP_LEAP=True + # For the finer resolution data, what percentage at the finer resoultion should pass for the daily data? DAILY_QC_THRESH=0.8 -# TODO: This needs to be evaluated PER season, not on all days in all DJF's, for example + +# TODO: This needs to be evaluated PER season (i.e. DJF for 2018, DJF for 2019 ...) not on all days in all DJF's, for example MIN_DAYS_SEASON=80 + +# For all seasons in the analysis period, how many total days per site? MIN_DAYS_SITE=450 +# Pattern to use for searching for fluxnet files +FILENAME_PATTERN='AMF_*_DD_*.csv' + def get_season_start_end(s,refdate): if s=='DJF': @@ -77,10 +86,14 @@ def get_season_start_end(s,refdate): print("ERROR: FLUXNET INPUT DIRECTORY DOES NOT EXIST.") sys.exit(1) else: - fn_file_list = glob.glob(os.path.join(fndir,'FLX_*_DD_*.csv')) + fn_file_list = glob.glob(os.path.join(fndir,FILENAME_PATTERN)) fn_stations = [os.path.basename(x).split('_')[1] for x in fn_file_list] - print("FOUND FLUXNET FILES FOR STATIONS:") - [print(x) for x in fn_stations] + if fn_stations == []: + print("ERROR! NO FLUXNET DATA FOUND MATCHING FILE PATTERN "+FILENAME_PATTERN) + sys.exit(1) + else: + print("FOUND FLUXNET FILES FOR STATIONS:") + [print(x) for x in fn_stations] # Loop over all stations we have data for and ensure we have the required metadata keep_stations = [] @@ -103,20 +116,23 @@ def get_season_start_end(s,refdate): metdf['lat'] = [sd[sd['station']==s]['lat'].values[0] for s in fn_stations] metdf['lon'] = [sd[sd['station']==s]['lon'].values[0] for s in fn_stations] +# Check and see what the length of metdf is here. If it is empty/zero, no FLUXNET data were found. +if len(metdf)==0: + print("ERROR! FOUND NO FLUXNET DATA FOR FILENAME_PATTERN AND METADATA PROVIDED. "+\ + "PLEASE RECONFIGURE AND TRY AGAIN.") + sys.exit(1) + # TODO: DEBUG print(metdf) -# Open each of the fluxnet files as a pandas dataframe -# HH files have different time columns than DD files -#dflist = [pd.read_csv(x,header=0,usecols=['TIMESTAMP_START','TIMESTAMP_END',sfc_flux_varname,soil_varname]) for x in fn_file_list] +# Open each of the fluxnet files as a pandas dataframe. This will only read the TIMESTAMP and data variable columns. +# This assumes "DD" files (daily) are used. dflist = [pd.read_csv(x,usecols=['TIMESTAMP',sfc_flux_varname,sfc_qc,soil_varname,soil_qc]) for x in fn_file_list] # Because the time record for each station is not the same, the dataframes cannot be merged. # Given the goal is a single value for each site for all dates that fall in a season, # the dataframes can remain separate. -sample_df = [] - for df,stn in tuple(zip(dflist,fn_stations)): print("PROCESSING STATION: %s" % (stn)) @@ -144,6 +160,10 @@ def get_season_start_end(s,refdate): # Subset by the requested season df = df[df['season']==season] + # If the season is DJF, remove leap days from February if requested + if season=='DJF' and SKIP_LEAP: + df = df[~((df.datetime.dt.month == 2) & (df.datetime.dt.day == 29))] + # Get the start and end of the season of the first year start, end = get_season_start_end(season,df['datetime'].iloc[0]) @@ -184,17 +204,11 @@ def get_season_start_end(s,refdate): # value for each day in the season, how do we get the seasonal value? Or should we average before # computingTCI? #df['tci'] = calc_tci(df[soil_varname],df[sfc_flux_varname]) - sample_df.append(df) metdf.loc[metdf['sid']==stn,'obs'] = land_sfc.calc_tci(df[soil_varname],df[sfc_flux_varname]) # Set the valid time as the first time in the record for this site metdf.loc[metdf['sid']==stn,'vld'] = pd.to_datetime(df['TIMESTAMP'].iloc[0],format='%Y%m%d').strftime('%Y%m%d_%H%M%S') -print(sample_df) -print(pd.concat(sample_df)) -pd.concat(sample_df).reset_index().to_csv('calc_tci_jja_pandas_input.csv',index=False) -exit() - # At this point, 'qc' should be '-9999' anywhere we discarded a site due to insufficient data above. # Remove these here. metdf = metdf[metdf['qc']=='NA'] @@ -202,13 +216,6 @@ def get_season_start_end(s,refdate): print(metdf) exit() -# Add a -print(dflist) - -[print(pd.to_datetime(df['TIMESTAMP']).dt.season) for df in dflist] -[print(df[soil_varname].max()) for df in dflist] -exit() - # TODO: # 1. Glob input directory for FLUXNET2015 CSV files (one file per site) # 2. Determine site name from filename- what other metadata is in the file? From c5228f61e34b5e6be747d01b329d1da8ac187818 Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Fri, 5 Apr 2024 22:29:09 +0000 Subject: [PATCH 15/31] Updates command line args for Python embedding scripts. --- .../land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf index 4fe0905888..f19b4df9e8 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf @@ -52,7 +52,7 @@ PY_EMBED_INGEST_1_OUTPUT_TEMPLATE = {OUTPUT_BASE}/regrid_data_plane_{custom}.nc # User can change it to use any other variable present in CESM files. ############################################################ -PY_EMBED_INGEST_1_SCRIPT = {PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/f.e21.FHIST.f09_f09_mg17.CESM2-CLM45physics.002.clm2.h1.1979-83_SoilWater10cm.nc {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/f.e21.FHIST.f09_f09_mg17.CESM2-CLM45physics.002.cam.h1.1979-83_CIvars.nc {custom} LHFLX +PY_EMBED_INGEST_1_SCRIPT = {PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/f.e21.FHIST.f09_f09_mg17.CESM2-CLM45physics.002.cam.h1.1979-83_CIvars.nc LHFLX {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/f.e21.FHIST.f09_f09_mg17.CESM2-CLM45physics.002.clm2.h1.1979-83_SoilWater10cm.nc SOILWATER_10CM {custom} PY_EMBED_INGEST_1_TYPE = NUMPY #PY_EMBED_INGEST_1_OUTPUT_GRID = G129 @@ -71,7 +71,7 @@ FCST_POINT_STAT_INPUT_TEMPLATE = PYTHON_NUMPY # fluxnet_tci.py in the PointStat_fcstCESM_obsFLUXNET2015_TCI # directory. ############################################################ -OBS_POINT_STAT_INPUT_TEMPLATE = PYTHON_NUMPY={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/F2015_LoCo_AllChains_F2015.nc4 {custom} +OBS_POINT_STAT_INPUT_TEMPLATE = PYTHON_NUMPY={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/F2015_LoCo_AllChains_F2015.nc4 LE_F_MDS SWC_F_MDS_1 {custom} POINT_STAT_OUTPUT_DIR = {OUTPUT_BASE}/PointStat From 7a9fd24b8eec9c386fea92274d4cb54589cbb91c Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Tue, 9 Apr 2024 22:29:47 +0000 Subject: [PATCH 16/31] Updated conf file for use case. --- .../PointStat_fcstCESM_obsFLUXNET2015_TCI.conf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf index f19b4df9e8..f9059a7aef 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf @@ -71,7 +71,8 @@ FCST_POINT_STAT_INPUT_TEMPLATE = PYTHON_NUMPY # fluxnet_tci.py in the PointStat_fcstCESM_obsFLUXNET2015_TCI # directory. ############################################################ -OBS_POINT_STAT_INPUT_TEMPLATE = PYTHON_NUMPY={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/F2015_LoCo_AllChains_F2015.nc4 LE_F_MDS SWC_F_MDS_1 {custom} +FLUXNET_OBS_METADATA_FILE={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnetstations.csv +OBS_POINT_STAT_INPUT_TEMPLATE = PYTHON_NUMPY={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py /home/dadriaan/projects/landsfc/data/feature_2388_Update_TCI/obs/fluxnet LE_F_MDS SWC_F_MDS_1 {custom} {FLUXNET_OBS_METADATA_FILE} POINT_STAT_OUTPUT_DIR = {OUTPUT_BASE}/PointStat @@ -88,7 +89,7 @@ FCST_POINT_STAT_VAR1_LEVELS = OBS_POINT_STAT_VAR1_NAME = TCI OBS_POINT_STAT_VAR1_LEVELS = L0 -FCST_POINT_STAT_VAR1_NAME = TCI_10cm_soil_depth +FCST_POINT_STAT_VAR1_NAME = TCI_10cm_soil_depth FCST_POINT_STAT_VAR1_LEVELS = Z10 BOTH_POINT_STAT_VAR1_THRESH = @@ -138,7 +139,7 @@ POINT_STAT_OUTPUT_PREFIX = {custom} # https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#plotpointobs ############################################################ -PLOT_POINT_OBS_INPUT_TEMPLATE =PYTHON_NUMPY={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/F2015_LoCo_AllChains_F2015.nc4 {custom} +PLOT_POINT_OBS_INPUT_TEMPLATE =PYTHON_NUMPY={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py /home/dadriaan/projects/landsfc/data/feature_2388_Update_TCI/obs/fluxnet LE_F_MDS SWC_F_MDS_1 {custom} {FLUXNET_OBS_METADATA_FILE} PLOT_POINT_OBS_GRID_INPUT_DIR = {OUTPUT_BASE}/ PLOT_POINT_OBS_GRID_INPUT_TEMPLATE = regrid_data_plane_{custom}.nc @@ -169,4 +170,3 @@ PLOT_POINT_OBS_POINT_DATA = colorbar_flag = FALSE; } } - From e0642ba6919f4c5492153074f8b42b5403d1ba9b Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Tue, 9 Apr 2024 22:30:12 +0000 Subject: [PATCH 17/31] Removes new TCI function because it is in METcalcpy now. --- .../tci_func.py | 32 ------------------- 1 file changed, 32 deletions(-) delete mode 100644 parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/tci_func.py diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/tci_func.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/tci_func.py deleted file mode 100644 index 1e25a7b690..0000000000 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/tci_func.py +++ /dev/null @@ -1,32 +0,0 @@ -from xarray.core.dataarray import DataArray -from pandas.core.series import Series - -#def calc_tci_cesm(soil_data,sfc_flux_data): -def calc_tci(soil_data,sfc_flux_data): - - # For Xarray objects, compute the mean - if isinstance(soil_data,DataArray) and isinstance(sfc_flux_data,DataArray): - soil_mean = soil_data.mean(dim='time') - soil_count = soil_data.count(dim='time') - sfc_flux_mean = sfc_flux_data.mean(dim='time') - soil_std = soil_data.std(dim='time') - print(((soil_data-soil_mean) * (sfc_flux_data-sfc_flux_mean))) - numer = ((soil_data-soil_mean) * (sfc_flux_data-sfc_flux_mean)).sum(dim='time') - # For Pandas objects, compute the mean - elif isinstance(soil_data,Series) and isinstance(sfc_flux_data,Series): - soil_mean = soil_data.mean() - soil_count = soil_data.count() - sfc_flux_mean = sfc_flux_data.mean() - soil_std = soil_data.std() - numer = ((soil_data-soil_mean) * (sfc_flux_data-sfc_flux_mean)).sum() - # No other object types are supported - else: - print("ERROR: Unrecognized Object Type in calc_tci.py") - print(type(soil_data)) - print(type(sfc_flux_data)) - exit(1) - - covarTerm = numer / soil_count - - return covarTerm/soil_std - From 7ead6f0310a8a026af168d82db71bd97bc3be0a7 Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Tue, 9 Apr 2024 22:31:19 +0000 Subject: [PATCH 18/31] Removes old code, somsome reorganization and clarification and setting of params, and also switches the fluxnet metadata file to a command line argument instead of an environment variable. --- .../fluxnet2015_tci.py | 106 ++++-------------- 1 file changed, 23 insertions(+), 83 deletions(-) diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py index d8e761e37b..9ff277e068 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py @@ -16,17 +16,18 @@ # Climate model data typically doesn't include leap days, so it is excluded from observations by default SKIP_LEAP=True -# For the finer resolution data, what percentage at the finer resoultion should pass for the daily data? +# For the finer resolution data, what fraction at the finer resoultion should pass QC to use the daily value? DAILY_QC_THRESH=0.8 -# TODO: This needs to be evaluated PER season (i.e. DJF for 2018, DJF for 2019 ...) not on all days in all DJF's, for example -MIN_DAYS_SEASON=80 +# Number of days in an individual season to require +MIN_DAYS_SEASON=1 -# For all seasons in the analysis period, how many total days per site? -MIN_DAYS_SITE=450 +# For all seasons (i.e. DJF 2000 + DJF 2001 ... DJF XXXX) in the analysis period, how many total days per site? +MIN_DAYS_SITE=10 # Pattern to use for searching for fluxnet files -FILENAME_PATTERN='AMF_*_DD_*.csv' +#FILENAME_PATTERN='AMF_*_DD_*.csv' +FILENAME_PATTERN='FLX_*_DD_*.csv' def get_season_start_end(s,refdate): @@ -49,7 +50,7 @@ def get_season_start_end(s,refdate): return start, end -# The script expects four arguments: +# The script expects five arguments: # raw_fluxnet_dir = Full path to the directory where the raw FLUXNET files are stored # sfc_flux_varname = Field name in the raw_fluxnet_file to use when computing TCI # soil_varname = Field name in the raw_fluxnet_file to use when computing TCI @@ -60,6 +61,7 @@ def get_season_start_end(s,refdate): print("sfc_flux_varname") print("soil_varname") print("season (DJF, MAM, JJA, or SON)") + print("fluxnet_station_metadata_file") sys.exit(1) # Store command line arguments @@ -69,6 +71,7 @@ def get_season_start_end(s,refdate): soil_varname = varCLM = sys.argv[3] soil_qc = soil_varname+'_QC' season = sys.argv[4] +fluxnetmeta = sys.argv[5] if season not in ['DJF','MAM','JJA','SON']: print("ERROR: UNRECOGNIZED SEASON IN tci_from_cesm.py") sys.exit(1) @@ -78,7 +81,11 @@ def get_season_start_end(s,refdate): # Read station information from static file, because the raw FLUXNET data does not contain # required metadata like station latitude/longitude that is required by MET. -sd = pd.read_csv('fluxnetstations.csv') +if not os.path.exists(fluxnetmeta): + print("ERROR! FLUXNET METADATA FILE NOT PRESENT.") + sys.exit(1) +else: + sd = pd.read_csv(fluxnetmeta) print("Starting Terrestrial Coupling Index Calculation for: "+season) # Locate files at the input directory @@ -139,10 +146,10 @@ def get_season_start_end(s,refdate): # Length of all data alldays = len(df) + print("NUMBER OF DAYS AT THIS SITE: %04d" % (alldays)) # Only save data with quality above the threshold and reset the index df = df[(df[sfc_qc].astype('float')>=DAILY_QC_THRESH)&(df[soil_qc].astype('float')>=DAILY_QC_THRESH)].reset_index() - print("INFO: DISCARDED %04d DAYS OF LOW QUALITY DATA FOR ALL SEASONS AT %s" % (int(alldays)-int(len(df)),stn)) # Add datetime column @@ -190,20 +197,16 @@ def get_season_start_end(s,refdate): print("REMOVING "+season+" ENDING %s" % (year)) df = df[(df['datetime'](end-datetime.timedelta(days=1)))] - # TODO: Double check each season has at least 80 days? - print("INFO: USING %04d DAYS OF DATA AT %s FOR %s" % (int(len(df)),stn,season)) - # Double check there are sufficient days at this site for all seasons if len(df) Date: Wed, 10 Apr 2024 19:16:06 +0000 Subject: [PATCH 19/31] Support for environment variables or default options for filtering and filename patterns, DEBUG mode added and set to False by default, adjustment of print statements for logging, and refactoring filtering of stations to ensure we don't process a file that we shouldn't by better coupling of filenames and stations. --- .../fluxnet2015_tci.py | 121 ++++++++++++------ 1 file changed, 82 insertions(+), 39 deletions(-) diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py index 9ff277e068..4c0dcddaf7 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py @@ -13,21 +13,28 @@ import sys import xarray as xr +DEBUG=False + # Climate model data typically doesn't include leap days, so it is excluded from observations by default -SKIP_LEAP=True +SKIP_LEAP = os.getenv('FLUXNET_SKIP_LEAP_DAYS',True) +SKIP_LEAP = bool(SKIP_LEAP) # For the finer resolution data, what fraction at the finer resoultion should pass QC to use the daily value? -DAILY_QC_THRESH=0.8 +DAILY_QC_THRESH = os.getenv('FLUXNET_HIGHRES_QC_THRESH',0.8) +DAILY_QC_THRESH = float(DAILY_QC_THRESH) # Number of days in an individual season to require -MIN_DAYS_SEASON=1 +MIN_DAYS_SEASON = os.getenv('FLUXNET_MIN_DAYS_PER_SEASON',1) +MIN_DAYS_SEASON = int(MIN_DAYS_SEASON) # For all seasons (i.e. DJF 2000 + DJF 2001 ... DJF XXXX) in the analysis period, how many total days per site? -MIN_DAYS_SITE=10 +MIN_DAYS_SITE = os.getenv('FLUXNET_MIN_DAYS_PER_SITE_ALL_SEASONS',10) +MIN_DAYS_SITE = int(MIN_DAYS_SITE) # Pattern to use for searching for fluxnet files -#FILENAME_PATTERN='AMF_*_DD_*.csv' -FILENAME_PATTERN='FLX_*_DD_*.csv' +#FILENAME_PATTERN = os.getenv('FLUXNET_RAW_FILENAME_PATTERN','AMF_*_DD_*.csv') +FILENAME_PATTERN = os.getenv('FLUXNET_RAW_FILENAME_PATTERN','FLX_*_DD_*.csv') +FILENAME_PATTERN = str(FILENAME_PATTERN) def get_season_start_end(s,refdate): @@ -73,7 +80,7 @@ def get_season_start_end(s,refdate): season = sys.argv[4] fluxnetmeta = sys.argv[5] if season not in ['DJF','MAM','JJA','SON']: - print("ERROR: UNRECOGNIZED SEASON IN tci_from_cesm.py") + print("ERROR: UNRECOGNIZED SEASON IN fluxnet2015_tci.py") sys.exit(1) # Dictionary mapping of which months go with which seasons @@ -99,29 +106,51 @@ def get_season_start_end(s,refdate): print("ERROR! NO FLUXNET DATA FOUND MATCHING FILE PATTERN "+FILENAME_PATTERN) sys.exit(1) else: - print("FOUND FLUXNET FILES FOR STATIONS:") - [print(x) for x in fn_stations] + if DEBUG: + print("FOUND FLUXNET FILES FOR STATIONS:") + print(fn_stations) + else: + print("fluxnet2015_tci.py INFO: FOUND DATA FOR %04d FLUXNET SITES." % (int(len(fn_stations)))) + + # Let's try using a dictionary where the key is the site name and the value is the site file + # in an effort to keep the site ID's and filenames aligned for now + file_info = {station:file for station,file in tuple(zip(fn_stations,fn_file_list))} # Loop over all stations we have data for and ensure we have the required metadata -keep_stations = [] -for s in fn_stations: - if not sd['station'].astype('str').str.contains(s).any(): - print("WARNING! EXCLUDING SITE %s, NO METADATA FOUND IN fluxnetstations.csv" % (s)) +# and required variables in the data file. +# If we don't have the metadata or required variables, +# remove the station and file from the analysis +dflist = [] +discard = [] +allfiles = len(file_info) +for station,stationfile in file_info.items(): + if not sd['station'].astype('str').str.contains(station).any(): + if DEBUG: + print("WARNING! EXCLUDING SITE %s, NO METADATA FOUND IN fluxnetstations.csv" % (station)) + discard.append(station) + df = pd.read_csv(stationfile) + if (sfc_flux_varname in df.columns and soil_varname in df.columns and soil_qc in df.columns): + dflist.append(df) else: - keep_stations.append(s) -fn_stations = keep_stations + if DEBUG: + print("WARNING! EXCLUDING SITE %s, MISSING ONE OR MORE REQUIRED VARIABLES." % (station)) + discard.append(station) + +# Reset the file info +final_files = {station:stationfile for station,stationfile in file_info.items() if station not in discard} +print("fluxnet2015_tci.py INFO: DISCARDED %04d SITES DUE TO MISSING METADATA OR VARIABLES." % (int(allfiles-len(final_files)))) # Create a MET dataframe metdf = pd.DataFrame(columns=['typ','sid','vld','lat','lon','elv','var','lvl','hgt','qc','obs']) -metdf['sid'] = fn_stations +metdf['sid'] = final_files.keys() metdf['typ'] = ['ADPSFC']*len(metdf) metdf['elv'] = [10]*len(metdf) metdf['lvl'] = [10]*len(metdf) metdf['var'] = ['TCI']*len(metdf) metdf['qc'] = ['NA']*len(metdf) metdf['hgt'] = [0]*len(metdf) -metdf['lat'] = [sd[sd['station']==s]['lat'].values[0] for s in fn_stations] -metdf['lon'] = [sd[sd['station']==s]['lon'].values[0] for s in fn_stations] +metdf['lat'] = [sd[sd['station']==s]['lat'].values[0] for s in final_files.keys()] +metdf['lon'] = [sd[sd['station']==s]['lon'].values[0] for s in final_files.keys()] # Check and see what the length of metdf is here. If it is empty/zero, no FLUXNET data were found. if len(metdf)==0: @@ -129,28 +158,30 @@ def get_season_start_end(s,refdate): "PLEASE RECONFIGURE AND TRY AGAIN.") sys.exit(1) -# TODO: DEBUG -print(metdf) - -# Open each of the fluxnet files as a pandas dataframe. This will only read the TIMESTAMP and data variable columns. -# This assumes "DD" files (daily) are used. -dflist = [pd.read_csv(x,usecols=['TIMESTAMP',sfc_flux_varname,sfc_qc,soil_varname,soil_qc]) for x in fn_file_list] - # Because the time record for each station is not the same, the dataframes cannot be merged. -# Given the goal is a single value for each site for all dates that fall in a season, +# Since the goal is a single value for each site for all dates that fall in a season, # the dataframes can remain separate. +for df,stn in tuple(zip(dflist,final_files.keys())): -for df,stn in tuple(zip(dflist,fn_stations)): - - print("PROCESSING STATION: %s" % (stn)) + if DEBUG: + print("PROCESSING STATION: %s" % (stn)) # Length of all data alldays = len(df) - print("NUMBER OF DAYS AT THIS SITE: %04d" % (alldays)) + if DEBUG: + print("NUMBER OF DAYS AT THIS SITE: %04d" % (alldays)) # Only save data with quality above the threshold and reset the index df = df[(df[sfc_qc].astype('float')>=DAILY_QC_THRESH)&(df[soil_qc].astype('float')>=DAILY_QC_THRESH)].reset_index() - print("INFO: DISCARDED %04d DAYS OF LOW QUALITY DATA FOR ALL SEASONS AT %s" % (int(alldays)-int(len(df)),stn)) + if DEBUG: + print("DISCARDED %04d DAYS OF LOW QUALITY DATA FOR ALL SEASONS AT %s" % (int(alldays)-int(len(df)),stn)) + + # Double check there's any valid data here + if not len(df) > 0: + if DEBUG: + print("WARNING! NO DATA LEFT AFTER QC FILTERING.") + metdf.loc[metdf['sid']==stn,'qc'] = '-9999' + continue # Add datetime column df['datetime'] = pd.to_datetime(df['TIMESTAMP'],format='%Y%m%d') @@ -166,6 +197,13 @@ def get_season_start_end(s,refdate): # Subset by the requested season df = df[df['season']==season] + + # Double check there's any valid data here + if not len(df) > 0: + if DEBUG: + print("WARNING! NO DATA FOR REQUESTED SEASON.") + metdf.loc[metdf['sid']==stn,'qc'] = '-9999' + continue # If the season is DJF, remove leap days from February if requested if season=='DJF' and SKIP_LEAP: @@ -181,7 +219,8 @@ def get_season_start_end(s,refdate): while start <= limit: year = end.strftime('%Y') ndays = len(df[(df['datetime']>=start) & (df['datetime']<=(end-datetime.timedelta(days=1)))]) - print("FOR "+season+" ENDING %s FOUND %04d DAYS." % (year,ndays)) + if DEBUG: + print("FOR "+season+" ENDING %s FOUND %04d DAYS." % (year,ndays)) # Store the season ending years where there are not enough days if ndays < MIN_DAYS_SEASON: @@ -194,17 +233,20 @@ def get_season_start_end(s,refdate): # Now actually remove the data we don't want to use for year in badyrs: start, end = get_season_start_end(season,datetime.datetime(int(year),1,1,0,0,0)) - print("REMOVING "+season+" ENDING %s" % (year)) + if DEBUG: + print("REMOVING "+season+" ENDING %s" % (year)) df = df[(df['datetime'](end-datetime.timedelta(days=1)))] # Double check there are sufficient days at this site for all seasons if len(df) Date: Wed, 10 Apr 2024 19:17:56 +0000 Subject: [PATCH 20/31] Makes DEBUG an env var for config via metplus wrappers. --- .../PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py index 4c0dcddaf7..a4175e072f 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py @@ -13,7 +13,9 @@ import sys import xarray as xr -DEBUG=False +# Extra debugging +DEBUG = os.getenv('FLUXNET_DEBUG',False) +DEBUG = bool(DEBUG) # Climate model data typically doesn't include leap days, so it is excluded from observations by default SKIP_LEAP = os.getenv('FLUXNET_SKIP_LEAP_DAYS',True) From db877e0a6c01d3e7efb763fc0b00c9017082c7f3 Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Wed, 10 Apr 2024 19:19:02 +0000 Subject: [PATCH 21/31] Reorganization of config file, adds environment variables, and updates comments for use case changes. --- ...PointStat_fcstCESM_obsFLUXNET2015_TCI.conf | 108 +++++++++--------- 1 file changed, 52 insertions(+), 56 deletions(-) diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf index f9059a7aef..fcec3f622b 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf @@ -40,108 +40,93 @@ CUSTOM_LOOP_LIST = DJF,MAM,JJA,SON LOG_LEVEL=DEBUG ############################################################ -# PyEmbedIngest Settings +# PyEmbedIngest Settings for CESM forecast data # https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#pyembedingest +# User can modify which flux and soil variable is extracted from the CESM files ############################################################ +CESM_CAM_VAR = LHFLX +CESM_CLM_VAR = SOILWATER_10CM +CESM_CAM_FILE_PATH = {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/f.e21.FHIST.f09_f09_mg17.CESM2-CLM45physics.002.cam.h1.1979-83_CIvars.nc +CESM_CLM_FILE_PATH = {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/f.e21.FHIST.f09_f09_mg17.CESM2-CLM45physics.002.clm2.h1.1979-83_SoilWater10cm.nc + PY_EMBED_INGEST_1_OUTPUT_DIR = PY_EMBED_INGEST_1_OUTPUT_TEMPLATE = {OUTPUT_BASE}/regrid_data_plane_{custom}.nc -############################################################ -# The last argument is the Latent Heat Flux. -# User can change it to use any other variable present in CESM files. -############################################################ - -PY_EMBED_INGEST_1_SCRIPT = {PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/f.e21.FHIST.f09_f09_mg17.CESM2-CLM45physics.002.cam.h1.1979-83_CIvars.nc LHFLX {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/f.e21.FHIST.f09_f09_mg17.CESM2-CLM45physics.002.clm2.h1.1979-83_SoilWater10cm.nc SOILWATER_10CM {custom} +PY_EMBED_INGEST_1_SCRIPT = {PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py {CESM_CAM_FILE_PATH} {CESM_CAM_VAR} {CESM_CLM_FILE_PATH} {CESM_CLM_VAR} {custom} PY_EMBED_INGEST_1_TYPE = NUMPY -#PY_EMBED_INGEST_1_OUTPUT_GRID = G129 PY_EMBED_INGEST_1_OUTPUT_GRID = "latlon 288 192 -90.0 -0.0 0.9375 1.25" ############################################################ -# PointStat Settings -# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#pointstat +# Python embedding settings for using FLUXNET2015 obs data with PointStat. +# User can change the variables used from FLUXNET here to compute TCI. +# The FLUXNET_* config items are also used for PlotPointObs below. ############################################################ +FLUXNET_DATA_DIR = /home/dadriaan/projects/landsfc/data/feature_2388_Update_TCI/obs/fluxnet +FLUXNET_LAT_HEAT_VAR = LE_F_MDS +FLUXNET_SOIL_MOIST_VAR = SWC_F_MDS_1 +FLUXNET_OBS_METADATA_FILE={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnetstations.csv -FCST_POINT_STAT_INPUT_TEMPLATE = PYTHON_NUMPY +OBS_POINT_STAT_INPUT_TEMPLATE = PYTHON_NUMPY={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py {FLUXNET_DATA_DIR} {FLUXNET_LAT_HEAT_VAR} {FLUXNET_SOIL_MOIST_VAR} {custom} {FLUXNET_OBS_METADATA_FILE} ############################################################ -# The default variables for the FLUXNET2015 observations id 10CM Soil Moisture -# and Latent Heat Flux. User can change them in the python Embedding script -# fluxnet_tci.py in the PointStat_fcstCESM_obsFLUXNET2015_TCI -# directory. +# PointStat Settings +# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#pointstat ############################################################ -FLUXNET_OBS_METADATA_FILE={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnetstations.csv -OBS_POINT_STAT_INPUT_TEMPLATE = PYTHON_NUMPY={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py /home/dadriaan/projects/landsfc/data/feature_2388_Update_TCI/obs/fluxnet LE_F_MDS SWC_F_MDS_1 {custom} {FLUXNET_OBS_METADATA_FILE} - -POINT_STAT_OUTPUT_DIR = {OUTPUT_BASE}/PointStat ############################################################ # Field Info # https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#field-info ############################################################ -POINT_STAT_ONCE_PER_FIELD = False - -FCST_POINT_STAT_INPUT_DIR = {OUTPUT_BASE}/ -FCST_POINT_STAT_INPUT_TEMPLATE = regrid_data_plane_{custom}.nc -FCST_POINT_STAT_VAR1_LEVELS = OBS_POINT_STAT_VAR1_NAME = TCI -OBS_POINT_STAT_VAR1_LEVELS = L0 +OBS_POINT_STAT_VAR1_LEVELS = L0 +OBS_POINT_STAT_WINDOW_BEGIN = -1000000000 +OBS_POINT_STAT_WINDOW_END = 2000000000 FCST_POINT_STAT_VAR1_NAME = TCI_10cm_soil_depth FCST_POINT_STAT_VAR1_LEVELS = Z10 -BOTH_POINT_STAT_VAR1_THRESH = +FCST_POINT_STAT_INPUT_DIR = {OUTPUT_BASE} +FCST_POINT_STAT_INPUT_TEMPLATE = regrid_data_plane_{custom}.nc ############################################################ -# PointStat Settings -# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#pointstat +# General PointStat Settings ############################################################ -LOG_POINT_STAT_VERBOSITY = 2 - -POINT_STAT_CONFIG_FILE ={PARM_BASE}/met_config/PointStatConfig_wrapped - +POINT_STAT_CONFIG_FILE = {PARM_BASE}/met_config/PointStatConfig_wrapped +POINT_STAT_DESC = TCI POINT_STAT_INTERP_TYPE_METHOD = NEAREST POINT_STAT_INTERP_TYPE_WIDTH = 1 - +POINT_STAT_MASK_GRID = FULL +POINT_STAT_MASK_POLY = +POINT_STAT_MASK_SID = +POINT_STAT_MESSAGE_TYPE = ADPSFC +POINT_STAT_OBS_VALID_BEG = 19790101_000000 +POINT_STAT_OBS_VALID_END = 20130101_000000 +POINT_STAT_OFFSETS = 0 +POINT_STAT_ONCE_PER_FIELD = False +POINT_STAT_OUTPUT_DIR = {OUTPUT_BASE}/PointStat POINT_STAT_OUTPUT_FLAG_CTC = BOTH POINT_STAT_OUTPUT_FLAG_MPR = BOTH POINT_STAT_OUTPUT_FLAG_CNT = BOTH - -OBS_POINT_STAT_WINDOW_BEGIN = -1000000000 -OBS_POINT_STAT_WINDOW_END = 2000000000 - -POINT_STAT_OFFSETS = 0 - -MODEL = CESM - -POINT_STAT_DESC = TCI -OBTYPE = - +POINT_STAT_OUTPUT_PREFIX = {custom} POINT_STAT_REGRID_TO_GRID = NONE POINT_STAT_REGRID_METHOD = BILIN POINT_STAT_REGRID_WIDTH = 2 -POINT_STAT_OBS_VALID_BEG = 19790101_000000 -POINT_STAT_OBS_VALID_END = 20130101_000000 - -POINT_STAT_MASK_GRID = FULL -POINT_STAT_MASK_POLY = -POINT_STAT_MASK_SID = - -POINT_STAT_MESSAGE_TYPE = ADPSFC - -POINT_STAT_OUTPUT_PREFIX = {custom} +LOG_POINT_STAT_VERBOSITY = 2 +MODEL = CESM +OBTYPE = FLUXNET ############################################################ # PlotPointObs Settings # https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#plotpointobs ############################################################ -PLOT_POINT_OBS_INPUT_TEMPLATE =PYTHON_NUMPY={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py /home/dadriaan/projects/landsfc/data/feature_2388_Update_TCI/obs/fluxnet LE_F_MDS SWC_F_MDS_1 {custom} {FLUXNET_OBS_METADATA_FILE} +PLOT_POINT_OBS_INPUT_TEMPLATE =PYTHON_NUMPY={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py {FLUXNET_DATA_DIR} {FLUXNET_LAT_HEAT_VAR} {FLUXNET_SOIL_MOIST_VAR} {custom} {FLUXNET_OBS_METADATA_FILE} -PLOT_POINT_OBS_GRID_INPUT_DIR = {OUTPUT_BASE}/ +PLOT_POINT_OBS_GRID_INPUT_DIR = {OUTPUT_BASE} PLOT_POINT_OBS_GRID_INPUT_TEMPLATE = regrid_data_plane_{custom}.nc PLOT_POINT_OBS_OUTPUT_DIR = {OUTPUT_BASE}/plot_point_obs @@ -170,3 +155,14 @@ PLOT_POINT_OBS_POINT_DATA = colorbar_flag = FALSE; } } + +############################################################ +# FLUXNET Python Embedding Script options +############################################################ +[user_env_vars] +FLUXNET_SKIP_LEAP_DAYS=True +FLUXNET_HIGHRES_QC_THRESH=0.8 +FLUXNET_MIN_DAYS_PER_SEASON=1 +FLUXNET_MIN_DAYS_PER_SITE_ALL_SEASONS=10 +FLUXNET_RAW_FILENAME_PATTERN=FLX_*_DD_*.csv +FLUXNET_DEBUG=False From 757f7a6f7af713fa28494d62eee4a5852ffccc23 Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Wed, 10 Apr 2024 19:44:51 +0000 Subject: [PATCH 22/31] Updates to documentation. --- .../PointStat_fcstCESM_obsFLUXNET2015_TCI.py | 76 ++++++++++++++++++- 1 file changed, 74 insertions(+), 2 deletions(-) diff --git a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py index 6ae0ba9f86..b3a198169b 100644 --- a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py +++ b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py @@ -99,20 +99,92 @@ # Python Embedding # ---------------- # -# This use case uses a Python embedding script to read input data +# This use case uses a Python embedding script to read both the forecast and observation data, in order to compute TCI, +# which is the diagnostic that is being verified by MET using PointStat. The CESM forecast data is read using # # parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py # # .. highlight:: python # .. literalinclude:: ../../../../parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py # +# The user can control all arguments to this script via the METplus use case configuration file using the following config entries +# .. list-table:: CESM Python Embedding Command Line Arguments +# :rwidths: 25 25 50 +# :header-rows: 1 +# +# * - Name +# - Default +# - Description +# * - CESM_CAM_VAR +# - LHFLX +# - The Community Atmosphere Model variable to use +# * - CESM_CLM_VAR +# - SOILWATER_10CM +# - The Community Land Model variable to use +# * - CESM_CAM_FILE_PATH +# - +# - The absolute path to the Community Atmosphere Model netcdf file +# * - CESM_CLM_FILE_PATH +# - +# - The absolute path to the Community Land Model netcdf file +# +# The raw FLUXNET2015 SUBSET data are read using +# # parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py # # .. highlight:: python # .. literalinclude:: ../../../../parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py # +# The user can control all command line arguments to this script via METplus config entries +# .. list-table:: FLUXNET Python Embedding Command Line Arguments +# :rwidths: 25 75 +# :header-rows: 1 +# +# * - Name +# - Default +# - Description +# * - FLUXNET_DATA_DIR +# - +# - The directory containing raw FLUXNET CSV files +# * - FLUXNET_LAT_HEAT_VAR +# - LE_F_MDS +# - The latent heat variable to use from FLUXNET to compute TCI +# * - FLUXNET_SOIL_MOIST_VAR +# - SWC_F_MDS_1 +# - The soil moisture variable to use from FLUXNET to compute TCI +# * - FLUXNET_OBS_METADATA_FILE +# - +# - The absolute path to the fluxnetstations.csv metadata file included with the use case +# +# and for data filtering options via METplus config entries +# .. list-table:: FLUXNET Python Embedding Data Filtering Options +# :rwidths: 25 25 50 +# :header-rows: 1 +# +# * - Name +# - Default +# - Description +# * - FLUXNET_SKIP_LEAP_DAYS +# - True +# - Skip 29 February days FLUXNET observations +# * - FLUXNET_HIGHRES_QC_THRESH +# - 0.8 +# - Fraction of higher-resolution data that passed quality control to include this day +# * - FLUXNET_MIN_DAYS_PER_SEASON +# - 1 +# - Minimum number of days to include in individual seasons at each site +# * - FLUXNET_MIN_DAYS_PER_SITE +# - 10 +# - Minimum number of days for all seasons at each site +# * - FLUXNET_RAW_FILENAME_PATTERN +# - FLX_*_DD_*.csv +# - A filename pattern matching the filename template of the raw FLUXNET CSV files +# * - FLUXNET_DEBUG +# - False +# - Turn on debugging print statements +# # Both of the above Python embedding scripts compute TCI using the `calc_tci()` function in METcalcpy. See the METcalcpy -# documentation for more information. +# documentation for more information # ############################################################################## From 51d03fd53dce0836f4b0e358800a296ac12a1bc9 Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Wed, 10 Apr 2024 20:02:42 +0000 Subject: [PATCH 23/31] Fixes tables. --- .../PointStat_fcstCESM_obsFLUXNET2015_TCI.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py index b3a198169b..7351f5c557 100644 --- a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py +++ b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py @@ -108,8 +108,9 @@ # .. literalinclude:: ../../../../parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py # # The user can control all arguments to this script via the METplus use case configuration file using the following config entries +# # .. list-table:: CESM Python Embedding Command Line Arguments -# :rwidths: 25 25 50 +# :widths: 25 25 50 # :header-rows: 1 # # * - Name @@ -136,8 +137,9 @@ # .. literalinclude:: ../../../../parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py # # The user can control all command line arguments to this script via METplus config entries +# # .. list-table:: FLUXNET Python Embedding Command Line Arguments -# :rwidths: 25 75 +# :widths: 25 25 50 # :header-rows: 1 # # * - Name @@ -157,8 +159,9 @@ # - The absolute path to the fluxnetstations.csv metadata file included with the use case # # and for data filtering options via METplus config entries +# # .. list-table:: FLUXNET Python Embedding Data Filtering Options -# :rwidths: 25 25 50 +# :widths: 25 25 50 # :header-rows: 1 # # * - Name From e1a640c451806b07b2dd7652d3eddc588893e945 Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Wed, 10 Apr 2024 20:13:22 +0000 Subject: [PATCH 24/31] Adds table of contents to the top for users to click on. --- .../land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py index 7351f5c557..5795cadced 100644 --- a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py +++ b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py @@ -5,6 +5,12 @@ model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf """ +############################################################################## +# .. contents:: +# :depth: 1 +# :local: +# :backlinks: none + ############################################################################## # Scientific Objective # -------------------- From 063088004174eff197dd109f5444515914e7f7c3 Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Wed, 10 Apr 2024 21:47:47 +0000 Subject: [PATCH 25/31] Updates use case documentation file. --- .../PointStat_fcstCESM_obsFLUXNET2015_TCI.py | 171 ++++++++++-------- 1 file changed, 94 insertions(+), 77 deletions(-) diff --git a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py index 5795cadced..71c35121c9 100644 --- a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py +++ b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py @@ -17,7 +17,12 @@ # This use case ingests two CESM (CAM and CLM) files and raw FLUXNET2015 data. # The use case calculates the Terrestrial Coupling Index (TCI) from the CESM forecasts and FLUXNET observations. # Utilizing Python embedding, this use case taps into a new vital observation dataset and compares it to CESM forecasts of TCI. -# Finally, it will generate plots of model forecast TCI overlaid with TCI observations at FLUXNET sites.. +# Finally, it will generate plots of model forecast TCI overlaid with TCI observations at FLUXNET sites. +# +# The reference for the Terrestrial Coupling Index calculation is as follows: +# +# Dirmeyer, P. A., 2011: The terrestrial segment of soil moisture-climate coupling. *Geophys. Res. Lett.*, **38**, L16702, doi: 10.1029/2011GL048268. +# ############################################################################## # Datasets @@ -106,94 +111,105 @@ # ---------------- # # This use case uses a Python embedding script to read both the forecast and observation data, in order to compute TCI, -# which is the diagnostic that is being verified by MET using PointStat. The CESM forecast data is read using +# which is the diagnostic that is being verified by MET using PointStat. The CESM forecast data is read using: # # parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py # # .. highlight:: python # .. literalinclude:: ../../../../parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py # -# The user can control all arguments to this script via the METplus use case configuration file using the following config entries -# -# .. list-table:: CESM Python Embedding Command Line Arguments -# :widths: 25 25 50 -# :header-rows: 1 -# -# * - Name -# - Default -# - Description -# * - CESM_CAM_VAR -# - LHFLX -# - The Community Atmosphere Model variable to use -# * - CESM_CLM_VAR -# - SOILWATER_10CM -# - The Community Land Model variable to use -# * - CESM_CAM_FILE_PATH -# - -# - The absolute path to the Community Atmosphere Model netcdf file -# * - CESM_CLM_FILE_PATH -# - -# - The absolute path to the Community Land Model netcdf file -# -# The raw FLUXNET2015 SUBSET data are read using +# The user can control all arguments to this script via the METplus use case configuration file using the following config entries: +# +# .. glossary:: +# +# CESM_CAM_VAR +# The CESM Community Atmosphere Model variable to use for computing TCI +# +# | *Default:* LHFLX +# +# CESM_CLM_VAR +# The CESM Community Land Model variable to use for computing TCI +# +# | *Default:* SOILWATER_10CM +# +# CESM_CAM_FILE_PATH +# The absolute path to the CESM Community Atmosphere Model netcdf file +# +# | *Default:* +# +# CESM_CLM_FILE_PATH +# The absolute path to the CESM Community Land Model netcdf file +# +# | *Default:* +# +# The raw FLUXNET2015 SUBSET data are read using: # # parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py # # .. highlight:: python # .. literalinclude:: ../../../../parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py # -# The user can control all command line arguments to this script via METplus config entries -# -# .. list-table:: FLUXNET Python Embedding Command Line Arguments -# :widths: 25 25 50 -# :header-rows: 1 -# -# * - Name -# - Default -# - Description -# * - FLUXNET_DATA_DIR -# - -# - The directory containing raw FLUXNET CSV files -# * - FLUXNET_LAT_HEAT_VAR -# - LE_F_MDS -# - The latent heat variable to use from FLUXNET to compute TCI -# * - FLUXNET_SOIL_MOIST_VAR -# - SWC_F_MDS_1 -# - The soil moisture variable to use from FLUXNET to compute TCI -# * - FLUXNET_OBS_METADATA_FILE -# - -# - The absolute path to the fluxnetstations.csv metadata file included with the use case -# -# and for data filtering options via METplus config entries -# -# .. list-table:: FLUXNET Python Embedding Data Filtering Options -# :widths: 25 25 50 -# :header-rows: 1 -# -# * - Name -# - Default -# - Description -# * - FLUXNET_SKIP_LEAP_DAYS -# - True -# - Skip 29 February days FLUXNET observations -# * - FLUXNET_HIGHRES_QC_THRESH -# - 0.8 -# - Fraction of higher-resolution data that passed quality control to include this day -# * - FLUXNET_MIN_DAYS_PER_SEASON -# - 1 -# - Minimum number of days to include in individual seasons at each site -# * - FLUXNET_MIN_DAYS_PER_SITE -# - 10 -# - Minimum number of days for all seasons at each site -# * - FLUXNET_RAW_FILENAME_PATTERN -# - FLX_*_DD_*.csv -# - A filename pattern matching the filename template of the raw FLUXNET CSV files -# * - FLUXNET_DEBUG -# - False -# - Turn on debugging print statements -# -# Both of the above Python embedding scripts compute TCI using the `calc_tci()` function in METcalcpy. See the METcalcpy -# documentation for more information +# The user can control all command line arguments to this script via METplus config entries: +# +# .. glossary:: +# +# FLUXNET_DATA_DIR +# The directory containing raw FLUXNET CSV files +# +# | *Default:* +# +# FLUXNET_LAT_HEAT_VAR +# The FLUXNET surface latent heat flux variable name to use for computing TCI +# +# | *Default:* LE_F_MDS +# +# FLUXNET_SOIL_MOIST_VAR +# The FLUXNET soil moisture variable name to use for computing TCI +# +# | *Default:* SWC_F_MDS_1 +# +# FLUXNET_OBS_METADATA_FILE +# The absolute path to the fluxnetstations.csv metadata file included with the use case +# +# | *Default:* +# +# and for data filtering options, via METplus config entries: +# +# .. glossary:: +# +# FLUXNET_SKIP_LEAP_DAYS +# Skip FLUXNET observations from 29 February days +# +# | *Default:* True +# +# FLUXNET_HIGHRES_QC_THRESH +# The fraction of higher temporal resolution FLUXNET data required to have +# passed quality control in order to use the daily data. +# +# | *Default:* 0.8 +# +# FLUXNET_MIN_DAYS_PER_SEASON +# The minimum number of days to include in individual seasons at each site +# +# | *Default:* 1 +# +# FLUXNET_MIN_DAYS_PER_SITE +# The minimum number of days for all seasons at each site +# +# | *Default:* 10 +# +# FLUXNET_RAW_FILENAME_PATTERN +# A filename pattern matching the template of the raw FLUXNET CSV files +# +# | *Default:* FLX_*_DD_*.csv +# +# FLUXNET_DEBUG +# Turn on additional print statements from the Python embedding script +# +# | *Default:* False +# +# Both of the above Python embedding scripts compute TCI using the ``calc_tci()`` function in METcalcpy. See the METcalcpy +# documentation for more information: https://metcalcpy.readthedocs.io/en/latest/index.html. # ############################################################################## @@ -206,6 +222,7 @@ # run_metplus.py /path/to/METplus/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf /path/to/user_system.conf # # See :ref:`running-metplus` for more information. +# ############################################################################## # Expected Output From 47cc8a00f944b87c056cbb8f5e8ef0baef47f79d Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Thu, 11 Apr 2024 22:42:56 +0000 Subject: [PATCH 26/31] Updated config file with obs and fcst subirectories in the path. --- .../land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf index fcec3f622b..7ed802462f 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf @@ -47,8 +47,8 @@ LOG_LEVEL=DEBUG CESM_CAM_VAR = LHFLX CESM_CLM_VAR = SOILWATER_10CM -CESM_CAM_FILE_PATH = {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/f.e21.FHIST.f09_f09_mg17.CESM2-CLM45physics.002.cam.h1.1979-83_CIvars.nc -CESM_CLM_FILE_PATH = {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/f.e21.FHIST.f09_f09_mg17.CESM2-CLM45physics.002.clm2.h1.1979-83_SoilWater10cm.nc +CESM_CAM_FILE_PATH = {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fcst/f.e21.FHIST.f09_f09_mg17.CESM2-CLM45physics.002.cam.h1.1979-83_CIvars.nc +CESM_CLM_FILE_PATH = {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fcst/f.e21.FHIST.f09_f09_mg17.CESM2-CLM45physics.002.clm2.h1.1979-83_SoilWater10cm.nc PY_EMBED_INGEST_1_OUTPUT_DIR = PY_EMBED_INGEST_1_OUTPUT_TEMPLATE = {OUTPUT_BASE}/regrid_data_plane_{custom}.nc @@ -63,7 +63,7 @@ PY_EMBED_INGEST_1_OUTPUT_GRID = "latlon 288 192 -90.0 -0.0 0.9375 1.25" # User can change the variables used from FLUXNET here to compute TCI. # The FLUXNET_* config items are also used for PlotPointObs below. ############################################################ -FLUXNET_DATA_DIR = /home/dadriaan/projects/landsfc/data/feature_2388_Update_TCI/obs/fluxnet +FLUXNET_DATA_DIR = {INPUT_BASE}/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/obs FLUXNET_LAT_HEAT_VAR = LE_F_MDS FLUXNET_SOIL_MOIST_VAR = SWC_F_MDS_1 FLUXNET_OBS_METADATA_FILE={PARM_BASE}/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnetstations.csv From b867dacdd2135fa77584ab2ab4c8b13324bd9c28 Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Fri, 12 Apr 2024 15:38:26 +0000 Subject: [PATCH 27/31] Switches to using metplotpy_env to get metcalcpy dependency. --- internal/tests/use_cases/all_use_cases.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/tests/use_cases/all_use_cases.txt b/internal/tests/use_cases/all_use_cases.txt index 8aed023e48..daef1a124e 100644 --- a/internal/tests/use_cases/all_use_cases.txt +++ b/internal/tests/use_cases/all_use_cases.txt @@ -88,7 +88,7 @@ Category: data_assimilation Category: land_surface -0::PointStat_fcstCESM_obsFLUXNET2015_TCI:: model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf:: py_embed +0::PointStat_fcstCESM_obsFLUXNET2015_TCI:: model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.conf:: metplotpy_env, py_embed Category: marine_and_cryosphere From 05dcedb8a27bb6c206a931c799fa55426b35973e Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Mon, 15 Apr 2024 14:09:59 +0000 Subject: [PATCH 28/31] Adds filtering based on missing data values. --- .../fluxnet2015_tci.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py index a4175e072f..f4d9a4829f 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py @@ -131,7 +131,7 @@ def get_season_start_end(s,refdate): print("WARNING! EXCLUDING SITE %s, NO METADATA FOUND IN fluxnetstations.csv" % (station)) discard.append(station) df = pd.read_csv(stationfile) - if (sfc_flux_varname in df.columns and soil_varname in df.columns and soil_qc in df.columns): + if (sfc_flux_varname in df.columns and soil_varname in df.columns and soil_qc in df.columns and sfc_qc in df.columns): dflist.append(df) else: if DEBUG: @@ -173,6 +173,16 @@ def get_season_start_end(s,refdate): if DEBUG: print("NUMBER OF DAYS AT THIS SITE: %04d" % (alldays)) + # Do some checking for missing data. FLUXNET says that -9999 is usecd for missing data. + # Both the soil and surface variable must be present to compute TCI, so we only want + # to retain days where both are not missing. + withmiss = len(df) + df = df[(df[sfc_flux_varname]!=-9999.) & (df[soil_varname]!=-9999.)] +# if DEBUG: + print("DISCARDED %04d DAYS WITH MISSING DATA." % (int(withmiss)-int(len(df)))) + missdiff = int(withmiss)-int(len(df)) + print("%3.2f" % ((float(missdiff)/float(withmiss))*100.0)) + # Only save data with quality above the threshold and reset the index df = df[(df[sfc_qc].astype('float')>=DAILY_QC_THRESH)&(df[soil_qc].astype('float')>=DAILY_QC_THRESH)].reset_index() if DEBUG: From 6f74ea6ba58fb5c7ac01c0acb65315d8f9708201 Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Mon, 15 Apr 2024 15:09:35 +0000 Subject: [PATCH 29/31] Finishing touches to debug statements for testing. --- .../fluxnet2015_tci.py | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py index f4d9a4829f..c81a8c3c01 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py @@ -166,6 +166,7 @@ def get_season_start_end(s,refdate): for df,stn in tuple(zip(dflist,final_files.keys())): if DEBUG: + print("==============================") print("PROCESSING STATION: %s" % (stn)) # Length of all data @@ -176,18 +177,23 @@ def get_season_start_end(s,refdate): # Do some checking for missing data. FLUXNET says that -9999 is usecd for missing data. # Both the soil and surface variable must be present to compute TCI, so we only want # to retain days where both are not missing. - withmiss = len(df) df = df[(df[sfc_flux_varname]!=-9999.) & (df[soil_varname]!=-9999.)] -# if DEBUG: - print("DISCARDED %04d DAYS WITH MISSING DATA." % (int(withmiss)-int(len(df)))) - missdiff = int(withmiss)-int(len(df)) - print("%3.2f" % ((float(missdiff)/float(withmiss))*100.0)) + if DEBUG: + missdiff = int(alldays)-int(len(df)) + print("DISCARDED %04d DAYS WITH MISSING DATA (%3.2f %%)." % (int(alldays)-int(len(df)),((float(missdiff)/float(alldays))*100.0))) + + # Reset length of good data + alldays = len(df) # Only save data with quality above the threshold and reset the index df = df[(df[sfc_qc].astype('float')>=DAILY_QC_THRESH)&(df[soil_qc].astype('float')>=DAILY_QC_THRESH)].reset_index() if DEBUG: print("DISCARDED %04d DAYS OF LOW QUALITY DATA FOR ALL SEASONS AT %s" % (int(alldays)-int(len(df)),stn)) + # Print the number of days remaining after filtering + if DEBUG: + print("NUMBER OF DAYS AFTER FILTERING AT THIS SITE: %04d" % (alldays)) + # Double check there's any valid data here if not len(df) > 0: if DEBUG: @@ -209,6 +215,8 @@ def get_season_start_end(s,refdate): # Subset by the requested season df = df[df['season']==season] + if DEBUG: + print("TOTAL DAYS FOR %s AT THIS SITE: %04d" % (season,len(df))) # Double check there's any valid data here if not len(df) > 0: @@ -219,7 +227,10 @@ def get_season_start_end(s,refdate): # If the season is DJF, remove leap days from February if requested if season=='DJF' and SKIP_LEAP: + withleap = len(df) df = df[~((df.datetime.dt.month == 2) & (df.datetime.dt.day == 29))] + if DEBUG: + print("REMOVED %03d LEAP DAYS." % (int(withleap)-int(len(df)))) # Get the start and end of the season of the first year start, end = get_season_start_end(season,df['datetime'].iloc[0]) From 7fc9b50ecb4827b3e420028f52f4361a5b92d2d0 Mon Sep 17 00:00:00 2001 From: Daniel Adriaansen Date: Mon, 15 Apr 2024 21:10:19 +0000 Subject: [PATCH 30/31] Fixing a few minor code smells from last week. --- .../PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py | 2 +- .../PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py index 0c568ae538..5b69bcd693 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/cesm_tci.py @@ -53,7 +53,7 @@ # Add a Pandas date range to subset by season time_units, reference_date = ds.time.attrs['units'].split('since') -if not time_units.strip() in ['D','days','Days','DAYS']: +if time_units.strip() not in ['D','days','Days','DAYS']: print("ERROR: TIME UNITS EXPECTED TO BE DAYS IN tci_from_cesm.py") sys.exit(1) else: diff --git a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py index c81a8c3c01..23e1c696c0 100644 --- a/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py +++ b/parm/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI/fluxnet2015_tci.py @@ -195,7 +195,7 @@ def get_season_start_end(s,refdate): print("NUMBER OF DAYS AFTER FILTERING AT THIS SITE: %04d" % (alldays)) # Double check there's any valid data here - if not len(df) > 0: + if len(df) <= 0: if DEBUG: print("WARNING! NO DATA LEFT AFTER QC FILTERING.") metdf.loc[metdf['sid']==stn,'qc'] = '-9999' @@ -219,7 +219,7 @@ def get_season_start_end(s,refdate): print("TOTAL DAYS FOR %s AT THIS SITE: %04d" % (season,len(df))) # Double check there's any valid data here - if not len(df) > 0: + if len(df) <= 0: if DEBUG: print("WARNING! NO DATA FOR REQUESTED SEASON.") metdf.loc[metdf['sid']==stn,'qc'] = '-9999' From c28f52d911b63ad3488e7f3cb7993c7f525f406b Mon Sep 17 00:00:00 2001 From: Dan Adriaansen Date: Tue, 16 Apr 2024 10:25:46 -0600 Subject: [PATCH 31/31] Update docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py Co-authored-by: George McCabe <23407799+georgemccabe@users.noreply.github.com> --- .../land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py index 71c35121c9..a7b11c5362 100644 --- a/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py +++ b/docs/use_cases/model_applications/land_surface/PointStat_fcstCESM_obsFLUXNET2015_TCI.py @@ -34,7 +34,7 @@ # # | **Observations:** Raw FLUXNET2015 observations # -# | **Location:** All of the input data required for this use case can be found in the met_test sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases +# | **Location:** All of the input data required for this use case can be found in the land_surface sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases # | This tarball should be unpacked into the directory that you will set the value of INPUT_BASE. See `Running METplus`_ section for more information. # # | **Data Source:** CESM - NSF NCAR Climate & Global Dynamics (CGD); FLUXNET2015 "SUBSET" Data Product: https://fluxnet.org/data/fluxnet2015-dataset/subset-data-product/