From c50f46fe9a63b1ed6da36772f0df2c384f973ba1 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Wed, 27 Sep 2023 15:37:12 -0400 Subject: [PATCH 01/14] Add code to grid.py that returns model data nearest to a (lat,lon) point grid.py - Add routines: - get_nearest_model_data_cs - get_nearest_model_data_ll - get_nearest_model_data - Trimmed trailing whitespace - Changed imports from e.g. "import .util" to "import gcpy.util" - Now import find_index, is_cubed_sphere from gcpy.cstools Signed-off-by: Bob Yantosca --- gcpy/grid.py | 193 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 188 insertions(+), 5 deletions(-) diff --git a/gcpy/grid.py b/gcpy/grid.py index c4d8be5e..3ff7cc7c 100644 --- a/gcpy/grid.py +++ b/gcpy/grid.py @@ -1,10 +1,15 @@ +""" +util.py: Contains several convenience and utility routines. +""" + import xarray as xr import numpy as np import scipy.sparse from itertools import product -from .util import get_shape_of_data -from .grid_stretching_transforms import scs_transform -from .constants import R_EARTH_m +from gcpy.util import get_shape_of_data, verify_variable_type +from gcpy.grid_stretching_transforms import scs_transform +from gcpy.constants import R_EARTH_m +from gcpy.cstools import find_index, is_cubed_sphere def get_troposphere_mask(ds): @@ -165,7 +170,7 @@ def call_make_grid(res, gridtype, in_extent=[-180, 180, -90, 90], Returns: [grid, grid_list]: list(dict, list(dict)) - Returns the created grid. + Returns the created grid. grid_list is a list of grids if gridtype is 'cs', else it is None """ @@ -1121,7 +1126,7 @@ def _initialize(self): pp[:, i, j] = latlon_to_cartesian( lambda_rad[i, j], theta_rad[i, j]) - # Map the edges on the sphere back to the cube. + # Map the edges on the sphere back to the cube. #Note that all intersections are at x = -rsq3 # print("EDGES") for ij in range(1, c + 1): @@ -1453,3 +1458,181 @@ def rotate_sphere_3D(theta, phi, r, rot_ang, rot_axis='x'): theta_new, phi_new, r_new = cartesian_to_spherical(x_new, y_new, z_new) return theta_new, phi_new, r_new + + +def get_nearest_model_data_cs( + gc_data, + gc_cs_grid, + lon_value, + lat_value, + varlist=None + +): + """ + Returns GEOS-Chem model data (on a cubed-sphere grid) at the + grid box closest to a given (lat, lon) location. + + Args: + ----- + gc_data : xarray.DataArray or xarray.Dataset + GEOS-Chem model data for a single variable + + gc_cs_grid: xarray Dataset + Coordinate arrays defining the cubed-sphere grid. + + lon_value : float + Longitude at the location of interest + + lat_value : float + Latitude at the location of interest + + + Keyword Args (optional): + ------------------------ + varlist : list of str + List of data variables to include in the output + + Returns: + -------- + dataframe: pandas.DataFrame + Model data closest to the observation site. + """ + verify_variable_type(gc_data, (xr.DataArray, xr.Dataset)) + verify_variable_type(gc_cs_grid, xr.Dataset) + + # Prevent the latitude from getting too close to the N or S poles + lat_value = max(min(lat_value, 89.75), -89.75) + + # Indices (nf, yInd, xInd) of box nearest to observation site + cs_indices = find_index( + lat_value, + lon_value, + gc_cs_grid + ) + + if varlist is not None: + return gc_data[varlist].isel( + nf=cs_indices[0, 0], + Ydim=cs_indices[1, 0], + Xdim=cs_indices[2, 0], + ).to_dataframe() + + return gc_data.isel( + nf=cs_indices[0, 0], + Ydim=cs_indices[1, 0], + Xdim=cs_indices[2, 0], + ).to_dataframe() + + +def get_nearest_model_data_ll( + gc_data, + lon_value, + lat_value, + varlist=None + +): + """ + Returns GEOS-Chem model data (on a cubed-sphere grid) at the + grid box closest to a given (lat, lon) location. + + Args: + ----- + gc_data : xarray.DataArray or xarray.Dataset + GEOS-Chem model data + + lon_value : float + Longitude at the location of interest + + lat_value : float + Latitude at the location of interest + + Keyword Args (optional): + ------------------------ + varlist : list of str + List of data variables to include in the output + + Returns: + -------- + dataframe: pandas.DataFrame + Model data closest to the observation site. + """ + verify_variable_type(gc_data, (xr.DataArray, xr.Dataset)) + + x_idx=( + np.abs( + gc_data.lon.values - float(lon_value) + ) + ).argmin() + + y_idx=( + np.abs( + gc_data.lat.values - float(lat_value) + ) + ).argmin() + + if varlist is not None: + return gc_data[varlist].isel( + lon=x_idx, + lat=y_idx, + ).to_dataframe() + + return gc_data.isel( + lon=x_idx, + lat=y_idx, + ).to_dataframe() + + +def get_nearest_model_data( + gc_data, + lon_value, + lat_value, + gc_cs_grid=None, + varlist=None +): + """ + Args: + ----- + gc_data : xarray.DataArray or xarray.Dataset + GEOS-Chem data for a single variable + + gc_cs_grid: xarray.Dataset or NoneType + Coordinate arrays defining the cubed-sphere grid. + + lon_value : float + Longitude at the location of interest + + lat_value : float + Latitude at the location of interest + + Keyword Args (optional): + ------------------------ + gc_cs_grid : xarray.Dataset + Datasaet with cubed-sphere grid definition. This can be + obtained as the output of function gcpy.util.extract_data(). + + varlist : list of str + List of data variables to include in the output + + Returns: + -------- + dataframe: pandas.DataFrame + Model data closest to the observation site. + """ + verify_variable_type(gc_data, (xr.DataArray, xr.Dataset)) + verify_variable_type(gc_cs_grid, (xr.Dataset, type(None))) + + if is_cubed_sphere(gc_data): + return get_nearest_model_data_cs( + gc_data, + gc_cs_grid, + lon_value, + lat_value, + varlist=varlist + ) + + return get_nearest_model_data_ll( + gc_data, + lon_value, + lat_value, + varlist=varlist, + ) From ce61fbb0846d286fe90c081468076078aded881e Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Fri, 3 Nov 2023 17:21:58 -0400 Subject: [PATCH 02/14] Update cubed-sphere inquiry functions to work w/ DataArray objects gcpy/cstools.py - In routine "is_cubed_sphere_rst_grid": - Check if len(data.coords["lat"]) == len(data.coords["lon"]) * 6, instead of checking data.dims. In Dataset objects, data.dims is a dict w/ dimension names and sizes, but in DataArray objects, dims is just a tuple of names. - Search for "SPC_" in data.name for DataArray objects, which do not have the data_vars dictionary - In routine "get_cubed_sphere_res": - Return len(data.coords["lon"]) instead of data.dims["lon"] and len(data.coords["Xdim"] instead of data.dims["Xdim"]. This will work if data is either a Dataset or DataArray object. Signed-off-by: Bob Yantosca --- gcpy/cstools.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/gcpy/cstools.py b/gcpy/cstools.py index a1d5dcac..eff3b508 100644 --- a/gcpy/cstools.py +++ b/gcpy/cstools.py @@ -746,11 +746,21 @@ def is_cubed_sphere_rst_grid(data): # internal state variables in GCHP. A more robust back-up check # could be to see if all the lats and lons are integer, since # that will be the case with the GCHP restart file format. - if "lat" in data.dims: - if data.dims["lat"] == data.dims["lon"] * 6: + + # NOTE: in DataArray objects, dims is a tuple but not a dict! + # Comparing the len of the lat & lon coords will work for both. + if "lat" in data.coords: + if len(data.coords["lat"]) == len(data.coords["lon"]) * 6: return True - if "SPC_" in data.data_vars.keys(): - return True + + # Dataset: search data.data_vars for "SPC_" + # DataArray: search data.name for "SPC_" + if isinstance(data, xr.Dataset): + if "SPC_" in data.data_vars.keys(): + return True + if "SPC_" in data.name: + return True + return False @@ -776,9 +786,13 @@ def get_cubed_sphere_res(data): if not is_cubed_sphere(data): return 0 + + # NOTE: In Dataset objects "dims" is a dict, but in DataArray + # objects "dims" is a tuple. Returning the length of the + # corresponding coords array should work in both cases. if is_cubed_sphere_rst_grid(data): - return data.dims["lon"] - return data.dims["Xdim"] + return len(data.coords["lon"]) + return len(data.coords["Xdim"]) def is_gchp_lev_positive_down(data): From 263257f0c3a932156f6cd94c5cf80ae9a22e68e2 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Mon, 6 Nov 2023 16:51:50 -0500 Subject: [PATCH 03/14] Simplify logic in cubed-sphere inquiry functions gcpy/cstools.py - Reduce the number of return statements by returning a boolean expression. This is more in line with what Pylint would suggest. Signed-off-by: Bob Yantosca --- gcpy/cstools.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/gcpy/cstools.py b/gcpy/cstools.py index eff3b508..4bc305ca 100644 --- a/gcpy/cstools.py +++ b/gcpy/cstools.py @@ -720,9 +720,7 @@ def is_cubed_sphere_diag_grid(data): True if the grid has History diagnostic dimensions, False otherwise. """ - if "nf" in data.dims: - return True - return False + return "nf" in data.dims def is_cubed_sphere_rst_grid(data): @@ -750,18 +748,13 @@ def is_cubed_sphere_rst_grid(data): # NOTE: in DataArray objects, dims is a tuple but not a dict! # Comparing the len of the lat & lon coords will work for both. if "lat" in data.coords: - if len(data.coords["lat"]) == len(data.coords["lon"]) * 6: - return True + return len(data.coords["lat"]) == len(data.coords["lon"]) * 6 # Dataset: search data.data_vars for "SPC_" # DataArray: search data.name for "SPC_" if isinstance(data, xr.Dataset): - if "SPC_" in data.data_vars.keys(): - return True - if "SPC_" in data.name: - return True - - return False + return "SPC_" in data.data_vars.keys() + return "SPC_" in data.name def get_cubed_sphere_res(data): From 2f1f311c035da5b2436e26a3fddce2d3eb1785f2 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Mon, 6 Nov 2023 16:55:27 -0500 Subject: [PATCH 04/14] Bug fix in plot_single_panel example script gcpy/examples/plotting/plot_single_panel.py - Fixed a "cut-and-paste" error. Removed calls to routine rename_and_flip_gchp_rst_vars for ref_ds and dev_ds, and added call for dset (which is correct). Signed-off-by: Bob Yantosca --- gcpy/examples/plotting/plot_single_panel.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gcpy/examples/plotting/plot_single_panel.py b/gcpy/examples/plotting/plot_single_panel.py index e9c3a8ba..3159c26d 100755 --- a/gcpy/examples/plotting/plot_single_panel.py +++ b/gcpy/examples/plotting/plot_single_panel.py @@ -40,8 +40,7 @@ def plot_single_panel(infile, varname, level): # If the data is from a GCHP restart file, rename variables and # flip levels to match the GEOS-Chem Classic naming and level # conventions. Otherwise no changes will be made. - ref_ds = rename_and_flip_gchp_rst_vars(ref_ds) - dev_ds = rename_and_flip_gchp_rst_vars(dev_ds) + dset = rename_and_flip_gchp_rst_vars(dset) # You can easily view the variables available for plotting # using xarray. Each of these variables has its own xarray @@ -123,7 +122,7 @@ def plot_single_panel(infile, varname, level): log_yaxis=True, log_color_scale=True, plot_type="zonal_mean", - title=f"Zonal mean plot for {varname}, stratopshere-only" + title=f"Zonal mean plot for {varname}, stratosphere-only" ) plt.show() From 029af495baeae62fce64a2bd20aee7414a05173c Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Mon, 4 Dec 2023 18:00:48 -0500 Subject: [PATCH 05/14] Now use get_nearest_model_obs in model vs. obs plots gcpy/benchmark/modules/benchmark_models_vs_obs.py - Remove imports for find_index, is_cubed_sphere functions - Add import for get_nearest_model_data from gcpy.grid - Updated Pydoc headers, make them more compact - Replace functions get_nearest_model_data_to_obs_cs and get_nearest_model_data_to_obs_ll with get_nearest_model_data_to_obs. This uses the common functiong get_nearest_model_data from grid.py - Rename gc_level_alts_m to gc_levels; also add "Altitude (m)" column - Remove function "which_finder_function", it's not needed - Remove **kwargs argument from calls to "prepare_data_for_plot" and "call_single_station" - Update label of observations to "Surface O3 (EBAS, 2019)" for clarity - Add code updates suggested by Pylint Signed-off-by: Bob Yantosca --- CHANGELOG.md | 4 + .../modules/benchmark_models_vs_obs.py | 311 +++++------------- 2 files changed, 90 insertions(+), 225 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 681badee..566e1749 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to GCPy will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] - TBD +### Changed +- Script `benchmark_model_vs_obs.py` now uses grid inquiry functions from `grid.py` to return data nearest to a (lat,lon) location + ## [1.4.0] - 2023-11-20 ### Added - Added C2H2 and C2H4 to `emission_species.yml` diff --git a/gcpy/benchmark/modules/benchmark_models_vs_obs.py b/gcpy/benchmark/modules/benchmark_models_vs_obs.py index 06b0b07a..d14bd7ab 100644 --- a/gcpy/benchmark/modules/benchmark_models_vs_obs.py +++ b/gcpy/benchmark/modules/benchmark_models_vs_obs.py @@ -23,7 +23,8 @@ import xarray as xr from gcpy.constants import skip_these_vars from gcpy.util import verify_variable_type, dataset_reader, make_directory -from gcpy.cstools import extract_grid, find_index, is_cubed_sphere +from gcpy.cstools import extract_grid +from gcpy.grid import get_nearest_model_data def read_nas( input_file, @@ -39,7 +40,6 @@ def read_nas( ----- input_file : str Path to data file with observational data (e.g. sonde data). - Keyword Args: ------------- verbose : bool @@ -48,9 +48,8 @@ def read_nas( Returns: -------- - obs_dataframe : pandas DataFrame + obs_dataframe : pandas.DataFrame Dataframe containing observational data from input_file. - obs_site_coords : dict Dictionary containing formatted site name: lon, lat and altitude. """ @@ -146,17 +145,15 @@ def read_observational_data( ----- path : str Path to the observational data directory - verbose : bool Toggles verbose printout on (True) or off (False). Default value: False Returns: -------- - obs_dataframe : pandas DataFrame + obs_dataframe : pandas.DataFrame DataFrame object with the observational data (i.e. station names, data, metadata). - obs_site_coords : dict Dictionary with coordinates of each observation site """ @@ -208,7 +205,6 @@ def read_model_data( ----- filepaths : list of str List of data files to read. - varname : str or list of str Variable name(s) to read from data files. @@ -220,7 +216,7 @@ def read_model_data( Returns: -------- - dataarray : xarray DataArray + dataarray : xarray.DataArray DataArray object containing data read from files specified by the filepaths argument. """ @@ -279,22 +275,20 @@ def find_times( ): """ Convert timestamps in nasa ames data files to python datetime - objects Set DataFrame index to the new datetime array + objects and set DataFrame index to the new datetime array. Args: ---------- - obs_dataframe : pandas DataFrame + obs_dataframe : pandas.DataFrame DataFrame with O3 values from GAW site - start_time : str Reference start time for timestamp taken from nasa ames file Returns ------ - obs_dataframe: pandas DataFrame + obs_dataframe: pandas.DataFrame O3 in ppbV with datetime index - - qcflag : pandas Dataframe + qcflag : pandas.Dataframe QC flag with datetime index """ end_time = obs_dataframe[obs_dataframe.columns[1]] @@ -310,13 +304,14 @@ def find_times( return obs_dataframe, qcflag -def get_nearest_model_data_to_obs_cs( +def get_nearest_model_data_to_obs( gc_data, - gc_cs_grid, - gc_level_alts_m, + gc_levels, lon_value, lat_value, - alt_value + alt_value, + gc_cs_grid=None + ): """ Returns GEOS-Chem model data (on a cubed-sphere grid) at the @@ -324,26 +319,22 @@ def get_nearest_model_data_to_obs_cs( Args: ----- - gc_data : xarray DataArray + gc_data : xarray.DataSet GEOS-Chem output to be processed - - gc_cs_grid: xarray Dataset - Coordinate arrays defining the cubed-sphere grid. - - gc_level_alts_m: pandas Series + gc_levels: pandas.DataFrame Altitudes of GEOS-Chem levels in meters - lon_value : float GAW site longitude - lat_value : float GAW site latitude - alt_value : float GAW site altitude - Keyword Args: - ------------- + Keyword Args: (Optional) + ------------------------ + gc_cs_grid : xarray.Dataset or NoneType + Dictionary containing the cubed-sphere grid definition + (or None if gc_data is not placed on a cubed-sphere grid). Returns: -------- @@ -351,128 +342,32 @@ def get_nearest_model_data_to_obs_cs( Model data closest to the observation site. """ verify_variable_type(gc_data, xr.DataArray) - verify_variable_type(gc_cs_grid, xr.Dataset) - verify_variable_type(gc_level_alts_m, pd.Series) + verify_variable_type(gc_cs_grid, (xr.Dataset, type(None))) + verify_variable_type(gc_levels, pd.DataFrame) # Prevent the latitude from getting too close to the N or S poles lat_value = max(min(lat_value, 89.75), -89.75) - # Indices (nf, yInd, xInd) of box nearest to observation site - cs_indices = find_index( - lat_value, - lon_value, - gc_cs_grid - ) - - # Index of nearest vertical levle to observation site - z_idx=( - np.abs( - gc_level_alts_m.values - float(alt_value) - ) - ).argmin() - - return gc_data.isel( - nf=cs_indices[0, 0], - Ydim=cs_indices[1, 0], - Xdim=cs_indices[2, 0], - lev=z_idx - ).to_dataframe() - - -def get_nearest_model_data_to_obs_ll( + # Nearest GEOS-Chem data to (lat, lon) of observation + dframe = get_nearest_model_data( gc_data, - gc_cs_grid, - gc_level_alts_m, lon_value, lat_value, - alt_value, - -): - """ - Returns GEOS-Chem model data (on a cubed-sphere grid) at the - grid box closest to an observation site location. - - Args: - ----- - gc_data : xarray DataSet - GEOS-Chem output to be processed - - gc_cs_grid : NoneType - Dummy variable (needed to make the argument list the - same as in get_nearest_model_data_to_obs_ll). - - gc_level_alts_m: pandas Series - Altitudes of GEOS-Chem levels in meters - - lon_value : float - GAW site longitude - - lat_value : float - GAW site latitude - - alt_value : float - GAW site altitude - - Returns: - -------- - dataframe: pandas.DataFrame - Model data closest to the observation site. - """ - verify_variable_type(gc_data, xr.DataArray) - verify_variable_type(gc_cs_grid, type(None)) - verify_variable_type(gc_level_alts_m, pd.Series) - - x_idx=( - np.abs( - gc_data.lon.values - float(lon_value) - ) - ).argmin() - - y_idx=( - np.abs( - gc_data.lat.values - float(lat_value) - ) - ).argmin() - - z_idx=( - np.abs( - gc_level_alts_m.values - float(alt_value) - ) - ).argmin() - - return gc_data.isel( - lon=x_idx, - lat=y_idx, - lev=z_idx - ).to_dataframe() - - -def which_finder_function( - data -): - """ - Returns the function that will be used to get the model data nearest - to the observation site. The function that is returned depends on - whether the model grid is lat-lon or cubed-sphere, as different - handling needs to be applied to each grid - - Args: - ----- - data : xarray.DataArray - Model data + gc_cs_grid=gc_cs_grid + ) + dframe = dframe.reset_index() - Returns: - -------- - A reference to the function that will read the data, depending - on whether the data is placed on a cubed-sphere grid or on - a lat-lon grid. - """ - verify_variable_type(data, (xr.DataArray, xr.Dataset)) + # Nearest GEOS-Chem level to observation + gc_alts = gc_levels["Altitude (m)"].values + n_alts = len(gc_alts) + z_idx =(np.abs(gc_alts - float(alt_value))).argmin() - if is_cubed_sphere(data): - return get_nearest_model_data_to_obs_cs + # Pick out elements of the dataframe at the nearest level to obs + rows = np.zeros(len(dframe), dtype=bool) + good = [(v * n_alts) + z_idx for v in range(12)] + rows[good] = True - return get_nearest_model_data_to_obs_ll + return dframe[rows].set_index("time") def get_geoschem_level_metadata( @@ -482,7 +377,7 @@ def get_geoschem_level_metadata( ): """ Reads a comma-separated variable (.csv) file with GEOS-Chem vertical - level metadata and returns it in a pandas DataFrame object. + level metadata and returns it in a pandas.DataFrame object. Args: ----- @@ -502,7 +397,7 @@ def get_geoschem_level_metadata( Returns: -------- - metadata : pandas DataFrame + metadata : pandas.DataFrame Metadata for each of the GEOS-Chem vertical levels. """ if filename is None: @@ -532,9 +427,8 @@ def prepare_data_for_plot( ref_cs_grid, dev_dataarray, dev_cs_grid, - gc_level_alts_m, + gc_levels, varname="SpeciesConcVV_O3", - **kwargs, ): """ Prepares data for passing to routine plot_single_frames as follows: @@ -546,23 +440,18 @@ def prepare_data_for_plot( Args: ----- - obs_dataframe : pandas DataFrame + obs_dataframe : pandas.DataFrame Observations at each station site. - obs_site_coords : dict Coordinates (lon, lat, alt) for each observation station site. - obs_site_name : str Name of the observation station site. - - ref_dataarray, dev_dataarray : xarray DataArray + ref_dataarray, dev_dataarray : xarray.DataArray Data from the Ref and Dev model versions. - ref_cs_grid, dev_cs_grid : xarray.Dataset or NoneType Dictionary containing the cubed-sphere grid definitions for ref_dataarray and dev_dataarray (or None if ref_dataarray or dev_dataarray are not placed on a cubed-sphere grid). - gc_level_alts_m : pandas Series Metadata pertaining to GEOS-Chem vertical levels @@ -574,16 +463,13 @@ def prepare_data_for_plot( Returns: -------- - obs_dataframe : pandas DataFrame + obs_dataframe : pandas.DataFrame Meanb observational data at the given station site. - ref_series, dev_series : pandas Series Data from the Ref and Dev model versions at the closest grid box to the observation station site. - subplot_title : str Plot title string for the given observation station site. - subplot_ylabel : str Label for the Y-axis (e.g. species name). """ @@ -594,29 +480,34 @@ def prepare_data_for_plot( verify_variable_type(dev_dataarray, xr.DataArray) verify_variable_type(ref_cs_grid, (xr.Dataset, type(None))) verify_variable_type(dev_cs_grid, (xr.Dataset, type(None))) - verify_variable_type(gc_level_alts_m, pd.Series) + verify_variable_type(gc_levels, pd.DataFrame) verify_variable_type(varname, str) # Get data from the Ref model closest to the data site - finder_function = which_finder_function(ref_dataarray) - ref_dataframe = finder_function( + coords = [ + round(obs_site_coords[obs_site_name]['lon'], 2), + round(obs_site_coords[obs_site_name]['lat'], 2), + round(obs_site_coords[obs_site_name]['alt'], 1) + ] + + # Get data from the Ref model closest to the obs site + ref_dataframe = get_nearest_model_data_to_obs( ref_dataarray, - ref_cs_grid, - gc_level_alts_m, - lon_value=round(obs_site_coords[obs_site_name]['lon'], 2), - lat_value=round(obs_site_coords[obs_site_name]['lat'], 2), - alt_value=round(obs_site_coords[obs_site_name]['alt'], 1) + gc_levels, + coords[0], # Obs site lon + coords[1], # Obs site lat + coords[2], # Obs site alt + gc_cs_grid=ref_cs_grid ) # Get data from the Dev model closest to the obs site - finder_function = which_finder_function(dev_dataarray) - dev_dataframe = finder_function( + dev_dataframe = get_nearest_model_data_to_obs( dev_dataarray, - dev_cs_grid, - gc_level_alts_m, - lon_value=round(obs_site_coords[obs_site_name]['lon'], 2), - lat_value=round(obs_site_coords[obs_site_name]['lat'], 2), - alt_value=round(obs_site_coords[obs_site_name]['alt'], 1) + gc_levels, + coords[0], # Obs site lon + coords[1], # Obs site lat + coords[2], # Obs site alt + gc_cs_grid=dev_cs_grid, ) # Take the monthly mean of observations for plotting @@ -657,8 +548,7 @@ def plot_single_station( ref_series, ref_label, dev_series, - dev_label, - **kwargs + dev_label ): """ Plots observation data vs. model data at a single station site. @@ -667,27 +557,20 @@ def plot_single_station( ----- fig : matplotlib.figure.Figure Matplotlib Figure object containing the plot. - rows_per_page, cols_per_page : int Number of rows and columns on each page of the plot. - subplot_index : int Index of the subplot on the page. Runs from 0 to (cols_per_page * rows_per_page - 1). - subplot_title, subplot_ylabel : str Top title and y-axis label for each subplot - - obs_dataframe : pandas DataFrame + obs_dataframe : pandas.DataFrame Observational data. - obs_site_name: : str Name of the observation station site. - ref_series, dev_series : pandas Series GEOS-Chem data at closest grid box to the observation station site for the Ref and Dev model versions. - ref_label, dev_label : str Descriptive labels (e.g. version numbers) for the GEOS-Chem Ref and Dev model versions. @@ -727,7 +610,7 @@ def plot_single_station( marker='^', markersize=4, lw=1, - label='Observations' + label='Surface O3 (EBAS, 2019)' ) # Plot model data @@ -793,7 +676,7 @@ def plot_one_page( dev_dataarray, dev_label, dev_cs_grid, - gc_level_alts_m, + gc_levels, rows_per_page=3, cols_per_page=3, varname="SpeciesConcVV_O3", @@ -804,40 +687,31 @@ def plot_one_page( Args: ----- - obs_dataframe : pandas DataFrame + obs_dataframe : pandas.DataFrame Observations at each station site. - obs_site_coords : dict Coordinates (lon, lat, alt) for each observation station site. - obs_site_names : list of str Names of observation station sites that fit onto a single page. - - ref_dataarray, dev_dataarray : xarray DataArray + ref_dataarray, dev_dataarray : xarray.DataArray Data from the Ref and Dev model versions. - ref_label, dev_label: str Labels describing the Ref and Dev datasets (e.g. version numbers) - ref_cs_grid, dev_cs_grid : xarray.Dataset or NoneType Dictionary containing the cubed-sphere grid definitions for ref_dataarray and dev_dataarray (or None if ref_dataarray or dev_dataarray are not placed on a cubed-sphere grid). - - gc_level_alts_m : pandas DataFrame + gc_levels : pandas.DataFrame Metadata pertaining to GEOS-Chem vertical levels Keyword Args: ------------- - rows_per_page, cols_per_page : int Number of rows and columns to plot on a single page. Default values: 3 rows, 3 columns - varname : str Variable name for GEOS-Chem diagnostic data. Default value: "SpeciesConcVV_O3" - verbose : bool Toggles verbose printout on (True) or off (False). Default value: False @@ -851,7 +725,7 @@ def plot_one_page( verify_variable_type(dev_dataarray, xr.DataArray) verify_variable_type(dev_label, str) verify_variable_type(dev_cs_grid, (xr.Dataset, type(None))) - verify_variable_type(gc_level_alts_m, pd.Series) + verify_variable_type(gc_levels, pd.DataFrame) # Define a new matplotlib.figure.Figure object for this page # Landscape width: 11" x 8" @@ -874,9 +748,8 @@ def plot_one_page( ref_cs_grid, # dict or none dev_dataarray, # xarray.DataArray dev_cs_grid, # dict or none - gc_level_alts_m, # pandas.Series - varname=varname, # str - **kwargs + gc_levels, # pandas.DataFrame + varname=varname # str ) # Plot models vs. observation for a single station site @@ -893,7 +766,6 @@ def plot_one_page( ref_label, # str dev_series, # pandas.Series dev_label, # str - **kwargs ) # Add extra spacing around plots @@ -921,7 +793,7 @@ def plot_models_vs_obs( ref_label, dev_dataarray, dev_label, - gc_level_alts_m, + gc_levels, varname="SpeciesConcVV_O3", dst="./benchmark", **kwargs @@ -931,19 +803,15 @@ def plot_models_vs_obs( Args: ----- - obs_dataframe : pandas DataFrame + obs_dataframe : pandas.DataFrame Observations at each station site. - obs_site_coords : dict Coordinates (lon, lat, alt) for each observation station site. - - ref_dataarray, dev_dataarray : xarray DataArray + ref_dataarray, dev_dataarray : xarray.DataArray Data from the Ref and Dev model versions. - ref_label, dev_label: str Labels describing the Ref and Dev datasets (e.g. version numbers) - - gc_level_alts_m : pandas DataFrame + gc_levels : pandas.DataFrame Metadata pertaining to GEOS-Chem vertical levels Keyword Args: @@ -951,11 +819,9 @@ def plot_models_vs_obs( varname : str Variable name for GEOS-Chem diagnostic data. Default value: "SpeciesConcVV_O3" - dst : str Root folder where output will be created. Default value: "./benchmark" - verbose : bool Toggles verbose printout on (True) or off (False). Default value: False @@ -966,7 +832,7 @@ def plot_models_vs_obs( verify_variable_type(ref_label, str) verify_variable_type(dev_dataarray, xr.DataArray) verify_variable_type(dev_label, str) - verify_variable_type(gc_level_alts_m, pd.Series) + verify_variable_type(gc_levels, pd.DataFrame) # Get the cubed-sphere grid definitions for Ref & Dev # (will be returned as "None" for lat/lon grids) @@ -974,7 +840,7 @@ def plot_models_vs_obs( dev_cs_grid = extract_grid(dev_dataarray) # Figure setup - plt.style.use('seaborn-darkgrid') + plt.style.use('seaborn-v0_8-darkgrid') rows_per_page = 3 cols_per_page = 3 plots_per_page = rows_per_page * cols_per_page @@ -1011,7 +877,7 @@ def plot_models_vs_obs( dev_dataarray, # xarray.DataArray dev_label, # str dev_cs_grid, # xarray.Dataset or NoneType - gc_level_alts_m, # pandas.Series + gc_levels, # pandas.DataFrame rows_per_page=rows_per_page, # int cols_per_page=cols_per_page, # int varname=varname, # str @@ -1040,10 +906,8 @@ def make_benchmark_models_vs_obs_plots( ----- obs_filepaths : str or list Path(s) to the observational data. - ref_filepaths, dev_filepaths: str or list Path(s) to the Ref and Dev model versions to be compared. - ref_label, dev_label : str Descriptive labels (e.g. for version numbers) for the Ref and Dev model data. @@ -1053,14 +917,11 @@ def make_benchmark_models_vs_obs_plots( varname : str Variable name for model data to be plotted against observations. Default value: "SpeciesConcVV_O3". - dst : str Path to the root folder where plots will be created. - verbose : bool Toggles verbose printout on (True) or off (False). Default value: False - overwrite : bool Toggles whether plots should be overwritten (True) or not (False). Default value: True @@ -1077,11 +938,11 @@ def make_benchmark_models_vs_obs_plots( overwrite=overwrite ) - # Get altitude [m] of GEOS-Chem level edges - gc_level_alts_m = \ - get_geoschem_level_metadata( - search_key="Altitude (km)" - ) * 1.0e3 + # Get GEOS-Chem level metadata + gc_levels = get_geoschem_level_metadata( + search_key=["Altitude (km)", "Eta Mid"] + ) + gc_levels["Altitude (m)"] = gc_levels["Altitude (km)"] * 1000.0 # Read the observational data obs_dataframe, obs_site_coords = read_observational_data( @@ -1107,7 +968,7 @@ def make_benchmark_models_vs_obs_plots( ref_label, # str dev_dataarray, # xarray.DataArray dev_label, # str - gc_level_alts_m, # pandas.Series + gc_levels, # pandas.DataFrame varname=varname, # str dst=dst, # str verbose=verbose # bool From 322750045cb8e7e767346b25c89040ca8b00164a Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Tue, 6 Feb 2024 12:04:58 -0500 Subject: [PATCH 06/14] Move get_geoschem_level_metadata to benchmark_utils script gcpy/benchmark/modules/benchmark_utils.py - Moved get_geoschem_level_metadata here - Also need to import pandas as pd gcpy/benchmark/modules/benchmark_model_vs_obs.py - Now import get_geoschem_level_metadata from benchmark_utils CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- CHANGELOG.md | 1 + .../modules/benchmark_models_vs_obs.py | 51 +------------------ gcpy/benchmark/modules/benchmark_utils.py | 50 ++++++++++++++++++ 3 files changed, 53 insertions(+), 49 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d3b42387..a569fa5a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - `requirements.txt` links to `docs/environment_files/requirements.txt` - Python packages for RTD documenation builds from `docs/environment_files/environment.yml` - Script `benchmark_model_vs_obs.py` now uses grid inquiry functions from `grid.py` to return data nearest to a (lat,lon) location +- Moved routine `get_geoschem_level_metadata` to `gcpy/benchmark/modules/benchmark_utils.py` ### Fixed - CS inquiry functions in `gcpy/cstools.py` now work properly for `xr.Dataset` and `xr.DataArray` objects diff --git a/gcpy/benchmark/modules/benchmark_models_vs_obs.py b/gcpy/benchmark/modules/benchmark_models_vs_obs.py index 9ef65250..d7969c72 100644 --- a/gcpy/benchmark/modules/benchmark_models_vs_obs.py +++ b/gcpy/benchmark/modules/benchmark_models_vs_obs.py @@ -25,6 +25,8 @@ from gcpy.util import verify_variable_type, dataset_reader, make_directory from gcpy.cstools import extract_grid from gcpy.grid import get_nearest_model_data +from gcpy.benchmark.modules.benchmark_utils import get_geoschem_level_metadata + def read_nas( input_file, @@ -370,55 +372,6 @@ def get_nearest_model_data_to_obs( return dframe[rows].set_index("time") -def get_geoschem_level_metadata( - filename=None, - search_key=None, - verbose=False, -): - """ - Reads a comma-separated variable (.csv) file with GEOS-Chem vertical - level metadata and returns it in a pandas.DataFrame object. - - Args: - ----- - filename : str - Name of the comma-separated variable to read. - Default value: "__file__/GC_72_vertical_levels.csv" - - Keyword Args: - ------------- - search_key : str - If present, will return metadata that matches this value. - Default: None - - verbose : bool - Toggles verbose printout on (True) or off (False). - Default value: True - - Returns: - -------- - metadata : pandas.DataFrame - Metadata for each of the GEOS-Chem vertical levels. - """ - if filename is None: - filename = os.path.join( - os.path.dirname(__file__), - "GC_72_vertical_levels.csv" - ) - - try: - if verbose: - print(f"get_geoschem_level_metadata: Reading {filename}") - metadata = pd.read_csv(filename) - except (IOError, OSError, FileNotFoundError) as exc: - msg = f"Could not read GEOS-Chem level metadata in {filename}!" - raise exc(msg) from exc - - if search_key is None: - return metadata - return metadata[search_key] - - def prepare_data_for_plot( obs_dataframe, obs_site_coords, diff --git a/gcpy/benchmark/modules/benchmark_utils.py b/gcpy/benchmark/modules/benchmark_utils.py index 82eeeaad..27778bd4 100644 --- a/gcpy/benchmark/modules/benchmark_utils.py +++ b/gcpy/benchmark/modules/benchmark_utils.py @@ -4,6 +4,7 @@ """ import os import numpy as np +import pandas as pd from gcpy import util from gcpy.constants import skip_these_vars @@ -263,3 +264,52 @@ def print_benchmark_info( print(" - GCHP vs GCHP") if conf["gchp_vs_gcc_diff_of_diffs"]["run"]: print(" - GCHP vs GCC diff of diffs") + + +def get_geoschem_level_metadata( + filename=None, + search_key=None, + verbose=False, +): + """ + Reads a comma-separated variable (.csv) file with GEOS-Chem vertical + level metadata and returns it in a pandas.DataFrame object. + + Args: + ----- + filename : str + Name of the comma-separated variable to read. + Default value: "__file__/GC_72_vertical_levels.csv" + + Keyword Args: + ------------- + search_key : str + If present, will return metadata that matches this value. + Default: None + + verbose : bool + Toggles verbose printout on (True) or off (False). + Default value: True + + Returns: + -------- + metadata : pandas.DataFrame + Metadata for each of the GEOS-Chem vertical levels. + """ + if filename is None: + filename = os.path.join( + os.path.dirname(__file__), + "GC_72_vertical_levels.csv" + ) + + try: + if verbose: + print(f"get_geoschem_level_metadata: Reading {filename}") + metadata = pd.read_csv(filename) + except (IOError, OSError, FileNotFoundError) as exc: + msg = f"Could not read GEOS-Chem level metadata in {filename}!" + raise exc(msg) from exc + + if search_key is None: + return metadata + return metadata[search_key] From 5c3c59fd09a5755cce46008076d05c23a9636770 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Tue, 6 Feb 2024 17:34:40 -0500 Subject: [PATCH 07/14] Add benchmark_models_vs_ozonesondes.py benchmarking script gcpy/benchmark/modules/benchmark_models_vs_ozonesondes.py - New script to plot model vs ozonesonde output for 1-yr benchmarks. Further work is still needed and will be done in subsequent commits. CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- CHANGELOG.md | 1 + .../benchmark_models_vs_ozonesondes.py | 429 ++++++++++++++++++ 2 files changed, 430 insertions(+) create mode 100644 gcpy/benchmark/modules/benchmark_models_vs_ozonesondes.py diff --git a/CHANGELOG.md b/CHANGELOG.md index a569fa5a..4da0d5cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Script `./release/changeVersionNumbers.sh`, used to update version numbers in various files before release - Mamba/Conda enviroment file `docs/environment_files/read_the_docs_environment.yml`, for building ReadTheDocs documentation - Environment files `docs/environment_files/gcpy_requirements.txt` and `docs/environment_files/read_the_docs_requirements.txt` +- New benchmark script `gcpy/benchmark/modules/benchmark_models_vs_ozonesondes.py` ### Changed - Bump pip from 23.2.1 to 23.3 (dependabot suggested this) diff --git a/gcpy/benchmark/modules/benchmark_models_vs_ozonesondes.py b/gcpy/benchmark/modules/benchmark_models_vs_ozonesondes.py new file mode 100644 index 00000000..aeb3714e --- /dev/null +++ b/gcpy/benchmark/modules/benchmark_models_vs_ozonesondes.py @@ -0,0 +1,429 @@ +#!/usr/bin/env python +# coding: utf-8 + +"""Plots ozone sonde data vs. GEOS-Chem data for 1-yr benchmarks""" + +import os +import numpy as np +import pandas as pd +import xarray as xr +from matplotlib.backends.backend_pdf import PdfPages +import matplotlib.pyplot as plt +from gcpy.grid import get_nearest_model_data +from gcpy.util import verify_variable_type +from gcpy.benchmark.modules.benchmark_utils import get_geoschem_level_metadata + + +def get_model_ozone_data(file_path): + """ + Returns model ozone data as a Pandas Dataframe object + + Args: + ----- + file_path (str) : Path to the data files + + Returns: + -------- + data (xr.DataArray) : Ozone data [ppbv] + """ + verify_variable_type(file_path, str) + + # Read data (if path has wildcard, will read multiple data files) + data = xr.open_mfdataset(file_path) + + # Convert from v/v to ppb + data *= 1.0e9 + + # Only pull out the Ozone data + if "SpeciesConcVV_O3" in data.data_vars: + return data["SpeciesConcVV_O3"] + if "SpeciesConc_O3" in data.data_vars: + return data["SpeciesConc_O3"] + + +def get_ref_and_dev_model_data( + ref_dir, + dev_dir, + year="2019" +): + """ + Returns the GEOS-Chem model data for Ref & Dev versions + + Args: + ----- + ref_dir (str) : Path to the Ref model SpeciesConc files + dev_dir (str) : Path to the Dev model SpeciesConc files + + Keyword Args (optional): + ------------------------ + year (str) : Benchmark year (default: 2019) + + Returns: + -------- + data_ref, data_dev (xr.DataArray) : Ref & Dev Ozone data + """ + verify_variable_type(ref_dir, str) + verify_variable_type(dev_dir, str) + + # Get the model data + data_ref = get_model_ozone_data( + f"{ref_dir}/GEOSChem.SpeciesConc.{year}*.nc4" + ) + data_dev = get_model_ozone_data( + f"{dev_dir}/GEOSChem.SpeciesConc.{year}*.nc4" + ) + + return data_ref, data_dev + + +def get_obs_ozone_data(file_path): + """ + Returns the ozone sonde observations as a pandas DataFrame object. + + Args: + ----- + file_path (str) : Path to ozone sonde file + + Returns: + -------- + data (pd.DataFrame) : Ozonesonde observations at various sites (ppbv) + """ + verify_variable_type(file_path, str) + + data = pd.read_csv(file_path, delimiter=',', index_col=0) + data = data.rename(columns={"Latitude": "lat", "Longitude": "lon"}) + + return data + + +def get_seasonal_means(obs, ref, dev, months): + """ + Returns seasonally averaged data for the observations + and Ref and Dev models. Also returns the data + """ + verify_variable_type(obs, pd.DataFrame) + verify_variable_type(ref, pd.DataFrame) + verify_variable_type(dev, pd.DataFrame) + verify_variable_type(months, list) + + # Filter data for season + seasonal_obs = obs[(obs['month'].isin(months))] + seasonal_ref = ref[(ref["month"].isin(months))] + seasonal_dev = dev[(dev["month"].isin(months))] + + # Take the seasonal means + mean_obs = seasonal_obs.groupby('pressure')['o3_ppb'].mean() + std_obs = seasonal_obs.groupby('pressure')['o3_ppb_sd'].mean() + mean_ref = seasonal_ref.groupby('pressure')['o3_ppb'].mean() + mean_dev = seasonal_dev.groupby('pressure')['o3_ppb'].mean() + + return mean_obs, std_obs, mean_ref, mean_dev + + +def get_nearest_model_data_to_obs( + gc_data, + lon, + lat, + gc_pressure, + gc_cs_grid=None +): + """ + Returns the nearest GEOS-Chem data to an observation. Also + inserts the GEOS-Chem pressure levels into the dataset. + """ + verify_variable_type(gc_data, (xr.Dataset, xr.DataArray)) + verify_variable_type(gc_pressure, np.ndarray) + verify_variable_type(gc_cs_grid, (xr.Dataset, type(None))) + + # Nearest data to the observation (pd.DataFrame) + data = get_nearest_model_data(gc_data, lon, lat, gc_cs_grid).reset_index() + + # Add "Pressure" and "month" columns + data.insert(4, "pressure", gc_pressure) + data["month"] = data["time"].dt.month + + # Rename columns to be consistent w/ obs data + data = data.rename( + columns={"SpeciesConcVV_O3": "o3_ppb", "SpeciesConc_O3": "o3_ppb"} + ) + + return data + + +def plot_one_site( + axes_subplot, + season, + season_idx, + site_idx, + mean_obs, + std_obs, + gc_ref_label, + mean_ref, + gc_dev_label, + mean_dev +): + """ + """ + + # Plotting observations with error bars + axes_subplot.errorbar( + mean_obs, + mean_obs.index, + xerr=std_obs, + fmt='-o', + color='black', + markersize=3, + label='Observations' + ) + + # Plotting model data + axes_subplot.plot( + mean_ref, + mean_ref.index, + color='r', + marker='o', + markersize=3, + lw=1, + label=gc_ref_label + ) + axes_subplot.plot( + mean_dev, + mean_dev.index, + color='g', + marker='o', + markersize=3, + lw=1, + label=gc_dev_label, + ) + + + # Inverting y-axis and setting y-axis to start from 1000 hPa + axes_subplot.invert_yaxis() + axes_subplot.set_ylim(1000, 50) + + # Setting x-axis scale to 0-160 ppb + axes_subplot.set_xlim(0, 160) + + # X/Y ticks + axes_subplot.tick_params(axis='both', labelsize=12) + + # Get current x-axis tick values + xticks = axes_subplot.get_xticks() + + # Exclude '0' from the x-axis tick labels + xtick_labels = [f'{tick:.0f}' if tick != 0 else '' for tick in xticks] + + # Set modified tick labels + axes_subplot.set_xticks(xticks) + axes_subplot.set_xticklabels(xtick_labels) + + # Add a legend in the fourth column and first row + if season_idx == 1 and site_idx == 4: + axes_subplot.legend( + loc='lower right', + fontsize='large' + ) + + # Adding latitude and season as text inside the plot + if site_idx == 1: + axes_subplot.text( + 0.05, + 0.8, + f'{season}', + transform=axes_subplot.transAxes, + fontsize=20, + verticalalignment='top', + ) + +def page_adjustments(fig): + """ + """ + + # Eliminating space between subplots completely + plt.subplots_adjust(wspace=0, hspace=0) + + # Adjust subplot layout + plt.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05) + + # Setting common labels + fig.text( + 0.5, + 0.01, + 'Ozone Concentration (ppb)', + ha='center', + fontsize=20 + ) + fig.text( + 0.0, + 0.5, + 'Pressure (hPa)', + va='center', + rotation='vertical', + fontsize=20 + ) + + #plt.tight_layout() + + +def plot_the_data( + obs_data, + gc_ref_label, + gc_ref_data, + gc_dev_label, + gc_dev_data, + gc_pressure, + pdf_path, + gc_cs_grid_ref=None, + gc_cs_grid_dev=None, +): + """ + Creates plots of model data vs. ozonesonde data + + Args: + ----- + obs_data (pd.DataFrame) : Observational data + gc_ref_label (str ) : Label for Ref model data + gc_ref_data (xr.DataArray) : Ref model data + gc_dev_label (str ) : Label for Dev model data + data_dev (xr.DataArray) : Dev model data + gc_pressure (np.ndarray ) : GC pressures (hPa), tiled x 12 months + pdf_path (str ) : Path to PDF file that will be created + """ + + # Define seasons + seasons = { + 'DJF': [12, 1, 2], + 'JJA': [3, 4, 5], + 'MAM': [6, 7, 8], + 'SON': [9, 10, 11] + } + + # Getting unique sites and sorting them + sorted_sites = sorted(obs_data['Site'].unique()) + + # Creating the PDF with no space between subplots + pdf_pages = PdfPages(pdf_path) + + # ================================================================== + # Loop over pages (4 sites per page) + # ================================================================== + for page_idx in range(1): # range(0, len(sorted_sites), 4): + fig, axs = plt.subplots( + 4, 4, + figsize=(15, 15), + sharex=True, + sharey=True + ) + + # Selecting the next four sites for this page + sites_for_page = sorted_sites[page_idx:page_idx+4] + + # ============================================================== + # Loop over sites + # ============================================================== + for site_idx, site in enumerate(sites_for_page, 1): + + # Get the lon & lat for each site + lat = obs_data[obs_data['Site'] == site]['lat'].iloc[0] + lon = obs_data[obs_data['Site'] == site]['lon'].iloc[0] + + # Get nearest model data to the observation as pd.DataFrame + nearest_ref = get_nearest_model_data_to_obs( + gc_ref_data, + lon, + lat, + gc_pressure, + gc_cs_grid=gc_cs_grid_ref + ) + nearest_dev = get_nearest_model_data_to_obs( + gc_dev_data, + lon, + lat, + gc_pressure, + gc_cs_grid=gc_cs_grid_dev + ) + + # Adding site names at the top of each column + axs[0, site_idx-1].set_title(f'{site} ({lat}°)', size=15) + + # ========================================================== + # Loop over seasons (DJF, JJA, MAM, SON) + # ========================================================== + for season_idx, (season, months) in enumerate(seasons.items(), 1): + + # Get the seasonal means of observations & model data + mean_obs, std_obs, mean_ref, mean_dev = get_seasonal_means( + obs_data[obs_data["Site"] == site], + nearest_ref, + nearest_dev, + months + ) + + # Create a plot for a single site (all seasons) + plot_one_site( + axes_subplot=axs[season_idx-1, site_idx-1], + season=season, + season_idx=season_idx, + site_idx=site_idx, + mean_obs=mean_obs, + std_obs=std_obs, + gc_ref_label=gc_ref_label, + mean_ref=mean_ref, + gc_dev_label=gc_dev_label, + mean_dev=mean_dev + ) + + # ================================================================ + # Global page adjustments + # ================================================================ + page_adjustments(fig) + pdf_pages.savefig() + plt.close() + + # ================================================================ + # Save the PDF + # ================================================================ + pdf_pages.close() + #print('The pdf has been closed') + + +def main(): + + # Define the base directories for the two models (customized) + gc_ref_label = '14.2.0-rc.2' + gc_dev_label = '14.3.0-rc.0' + model_root_dir = "/n/holyscratch01/jacob_lab/ryantosca/BM/1yr" + obs_root_dir = "/n/jacob_lab/Lab/obs_data_for_bmk/atom_obs" + + # figure saveout path +# pdf_file = '/n/holyscratch01/jacob_lab/ryantosca/BM/1yr/BenchmarkResults/GCC_version_comparison/ModelVsObs/ozonesondes.pdf' + pdf_file = '/n/holyscratch01/jacob_lab/ryantosca/BM/1yr/ozonesondes.pdf' + + # Get the model data + gc_ref_data, gc_dev_data = get_ref_and_dev_model_data( + f"{model_root_dir}/{gc_ref_label}/GCClassic/FullChem/OutputDir", + f"{model_root_dir}/{gc_dev_label}/GCClassic/FullChem/OutputDir" + ) + + # Get the observational data + obs_data = get_obs_ozone_data( + f"{obs_root_dir}/allozonesondes_2010-2019.csv" + ) + + # GEOS-Chem pressure levels, tiled for 12 months + gc_levels = get_geoschem_level_metadata() + gc_pressure = np.tile(gc_levels["Pressure (hPa)"].to_numpy(), 12) + + # Create plots + plot_the_data( + obs_data, + gc_ref_label, + gc_ref_data, + gc_dev_label, + gc_dev_data, + gc_pressure, + pdf_file, + ) + +if __name__ == '__main__': + main() From b90daaf64e1a99177251f69812fb2aeb2309398a Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Tue, 6 Feb 2024 18:43:09 -0500 Subject: [PATCH 08/14] Further updates to benchmark_models_vs_ozonesondes.py gcpy/benchmark/modules/benchmark_models_vs_ozonesondes.py - Now import additional variables & functions from gcpy.grid, so that we can eventually compute the p_mid coordinate from the surface pressure at the observation site. - Add optional "Collection" kwarg to get_ref_and_dev_model_data - Trimmed trailing whitespace - Updated comments Signed-off-by: Bob Yantosca --- .../benchmark_models_vs_ozonesondes.py | 64 +++++++++++-------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/gcpy/benchmark/modules/benchmark_models_vs_ozonesondes.py b/gcpy/benchmark/modules/benchmark_models_vs_ozonesondes.py index aeb3714e..29468f6e 100644 --- a/gcpy/benchmark/modules/benchmark_models_vs_ozonesondes.py +++ b/gcpy/benchmark/modules/benchmark_models_vs_ozonesondes.py @@ -9,7 +9,8 @@ import xarray as xr from matplotlib.backends.backend_pdf import PdfPages import matplotlib.pyplot as plt -from gcpy.grid import get_nearest_model_data +from gcpy.grid import _GEOS_72L_AP, _GEOS_72L_BP, \ + get_nearest_model_data, vert_grid from gcpy.util import verify_variable_type from gcpy.benchmark.modules.benchmark_utils import get_geoschem_level_metadata @@ -27,7 +28,7 @@ def get_model_ozone_data(file_path): data (xr.DataArray) : Ozone data [ppbv] """ verify_variable_type(file_path, str) - + # Read data (if path has wildcard, will read multiple data files) data = xr.open_mfdataset(file_path) @@ -44,6 +45,7 @@ def get_model_ozone_data(file_path): def get_ref_and_dev_model_data( ref_dir, dev_dir, + collection="SpeciesConc", year="2019" ): """ @@ -56,7 +58,8 @@ def get_ref_and_dev_model_data( Keyword Args (optional): ------------------------ - year (str) : Benchmark year (default: 2019) + collection (str) : Diagnostic collection (default: SpeciesConc) + year (str) : Benchmark year (default: 2019) Returns: -------- @@ -64,13 +67,13 @@ def get_ref_and_dev_model_data( """ verify_variable_type(ref_dir, str) verify_variable_type(dev_dir, str) - + # Get the model data data_ref = get_model_ozone_data( - f"{ref_dir}/GEOSChem.SpeciesConc.{year}*.nc4" + f"{ref_dir}/GEOSChem.{collection}.{year}*.nc4" ) data_dev = get_model_ozone_data( - f"{dev_dir}/GEOSChem.SpeciesConc.{year}*.nc4" + f"{dev_dir}/GEOSChem.{collection}.{year}*.nc4" ) return data_ref, data_dev @@ -83,7 +86,7 @@ def get_obs_ozone_data(file_path): Args: ----- file_path (str) : Path to ozone sonde file - + Returns: -------- data (pd.DataFrame) : Ozonesonde observations at various sites (ppbv) @@ -105,7 +108,7 @@ def get_seasonal_means(obs, ref, dev, months): verify_variable_type(ref, pd.DataFrame) verify_variable_type(dev, pd.DataFrame) verify_variable_type(months, list) - + # Filter data for season seasonal_obs = obs[(obs['month'].isin(months))] seasonal_ref = ref[(ref["month"].isin(months))] @@ -132,12 +135,16 @@ def get_nearest_model_data_to_obs( inserts the GEOS-Chem pressure levels into the dataset. """ verify_variable_type(gc_data, (xr.Dataset, xr.DataArray)) - verify_variable_type(gc_pressure, np.ndarray) verify_variable_type(gc_cs_grid, (xr.Dataset, type(None))) - + # Nearest data to the observation (pd.DataFrame) data = get_nearest_model_data(gc_data, lon, lat, gc_cs_grid).reset_index() + # NOTE: This would more accurately represent the pressure coordinate. + # Need to get approx surface pressure at each site. + #pmid = vert_grid(_GEOS_72L_AP, _GEOS_72L_BP, p_sfc=X).p_mid() + #pmid = np.tile(pmid, 12) + # Add "Pressure" and "month" columns data.insert(4, "pressure", gc_pressure) data["month"] = data["time"].dt.month @@ -164,7 +171,7 @@ def plot_one_site( ): """ """ - + # Plotting observations with error bars axes_subplot.errorbar( mean_obs, @@ -196,7 +203,6 @@ def plot_one_site( label=gc_dev_label, ) - # Inverting y-axis and setting y-axis to start from 1000 hPa axes_subplot.invert_yaxis() axes_subplot.set_ylim(1000, 50) @@ -218,7 +224,7 @@ def plot_one_site( axes_subplot.set_xticklabels(xtick_labels) # Add a legend in the fourth column and first row - if season_idx == 1 and site_idx == 4: + if season_idx == 1 and site_idx == 4: axes_subplot.legend( loc='lower right', fontsize='large' @@ -233,12 +239,17 @@ def plot_one_site( transform=axes_subplot.transAxes, fontsize=20, verticalalignment='top', - ) + ) def page_adjustments(fig): """ + Adjusts the page settings after all the subplots have been made. + + Args: + ----- + fig (mpl.Figure) : Figure object """ - + # Eliminating space between subplots completely plt.subplots_adjust(wspace=0, hspace=0) @@ -264,7 +275,7 @@ def page_adjustments(fig): #plt.tight_layout() - + def plot_the_data( obs_data, gc_ref_label, @@ -276,7 +287,7 @@ def plot_the_data( gc_cs_grid_ref=None, gc_cs_grid_dev=None, ): - """ + """ Creates plots of model data vs. ozonesonde data Args: @@ -287,9 +298,9 @@ def plot_the_data( gc_dev_label (str ) : Label for Dev model data data_dev (xr.DataArray) : Dev model data gc_pressure (np.ndarray ) : GC pressures (hPa), tiled x 12 months - pdf_path (str ) : Path to PDF file that will be created + pdf_path (str ) : Path to PDF file that will be created """ - + # Define seasons seasons = { 'DJF': [12, 1, 2], @@ -307,7 +318,8 @@ def plot_the_data( # ================================================================== # Loop over pages (4 sites per page) # ================================================================== - for page_idx in range(1): # range(0, len(sorted_sites), 4): + for page_idx in range(1): + #for page_idx in range(0, len(sorted_sites), 4): fig, axs = plt.subplots( 4, 4, figsize=(15, 15), @@ -342,7 +354,7 @@ def plot_the_data( gc_pressure, gc_cs_grid=gc_cs_grid_dev ) - + # Adding site names at the top of each column axs[0, site_idx-1].set_title(f'{site} ({lat}°)', size=15) @@ -361,7 +373,7 @@ def plot_the_data( # Create a plot for a single site (all seasons) plot_one_site( - axes_subplot=axs[season_idx-1, site_idx-1], + axes_subplot=axs[season_idx-1, site_idx-1], season=season, season_idx=season_idx, site_idx=site_idx, @@ -379,7 +391,7 @@ def plot_the_data( page_adjustments(fig) pdf_pages.savefig() plt.close() - + # ================================================================ # Save the PDF # ================================================================ @@ -388,15 +400,17 @@ def plot_the_data( def main(): + """ + Main program (for testing) + """ # Define the base directories for the two models (customized) gc_ref_label = '14.2.0-rc.2' gc_dev_label = '14.3.0-rc.0' model_root_dir = "/n/holyscratch01/jacob_lab/ryantosca/BM/1yr" obs_root_dir = "/n/jacob_lab/Lab/obs_data_for_bmk/atom_obs" - + # figure saveout path -# pdf_file = '/n/holyscratch01/jacob_lab/ryantosca/BM/1yr/BenchmarkResults/GCC_version_comparison/ModelVsObs/ozonesondes.pdf' pdf_file = '/n/holyscratch01/jacob_lab/ryantosca/BM/1yr/ozonesondes.pdf' # Get the model data From fb12a887be7c970e93855b4e00ec2b6432a56446 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Wed, 7 Feb 2024 11:19:46 -0500 Subject: [PATCH 09/14] get_vert_grid now accepts the p_sfc argument gcpy/grid.py - In routine get_vert_grid.py: - Add kwarg p_sfc with default value 1013.25 hPa so that we can generate the vertical grid edges and centers with reference to a given surface pressure - Updated PyDoc header - Never-nested the if-block logic to remove elif, else statements CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- CHANGELOG.md | 1 + gcpy/grid.py | 67 +++++++++++++++++++++++++++++----------------------- 2 files changed, 39 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4da0d5cb..868cb856 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Python packages for RTD documenation builds from `docs/environment_files/environment.yml` - Script `benchmark_model_vs_obs.py` now uses grid inquiry functions from `grid.py` to return data nearest to a (lat,lon) location - Moved routine `get_geoschem_level_metadata` to `gcpy/benchmark/modules/benchmark_utils.py` +- Refactored `get_vert_grid.py` (in `gcpy/grid.py`) to accept the `p_sfc` argument; Also never-nested the if-block logic. ### Fixed - CS inquiry functions in `gcpy/cstools.py` now work properly for `xr.Dataset` and `xr.DataArray` objects diff --git a/gcpy/grid.py b/gcpy/grid.py index 06295a86..20e02696 100644 --- a/gcpy/grid.py +++ b/gcpy/grid.py @@ -248,49 +248,58 @@ def get_grid_extents(data, edges=True): return -180, 180, -90, 90 -def get_vert_grid(dataset, AP=[], BP=[]): +def get_vert_grid( + dataset, + AP=None, + BP=None, + p_sfc=1013.25): """ Determine vertical grid of input dataset Args: - dataset: xarray Dataset - A GEOS-Chem output dataset + ----- + dataset (xr.Dataset) : A GEOS-Chem output dataset Keyword Args (optional): - AP: list-like type - Hybrid grid parameter A in hPa - Default value: [] - BP: list-like type - Hybrid grid parameter B (unitless) - Default value: [] + ------------------------ + AP (list-like) : Hybrid grid parameter A (hPA) + BP (list-like) : Hybrid grid parameter B (unitless) + p_sfc (float) : Returns: - p_edge: numpy array - Edge pressure values for vertical grid - p_mid: numpy array - Midpoint pressure values for vertical grid - nlev: int - Number of levels in vertical grid + -------- + pedge (np.ndarray) : Edge pressure values for vertical grid + p_mid (np.ndarray) : Midpoint pressure values for vertical grid + nlev: (int ) : Number of levels in vertical grid """ + # 72L GEOS grid if dataset.sizes["lev"] in (72, 73): - return GEOS_72L_grid.p_edge(), GEOS_72L_grid.p_mid(), 72 - elif dataset.sizes["lev"] in (47, 48): - return GEOS_47L_grid.p_edge(), GEOS_47L_grid.p_mid(), 47 - elif AP == [] or BP == []: + grid = vert_grid(_GEOS_72L_AP, _GEOS_72L_BP, p_sfc) + return grid.p_edge(), grid.p_mid(), 72 + + # 47L GEOS grid + if dataset.sizes["lev"] in (47, 48): + grid = vert_grid(_GEOS_47L_AP, _GEOS_47L_BP, p_sfc) + return grid.p_edge(), grid.p_mid(), 47 + + # Grid without specified AP, BP + if AP == None or BP == None: if dataset.sizes["lev"] == 1: AP = [1, 1] BP = [1] - new_grid = vert_grid(AP, BP) - return new_grid.p_edge(), new_grid.p_mid(), np.size(AP) - else: - raise ValueError( - "Only 72/73 or 47/48 level vertical grids are automatically determined" + - "from input dataset by get_vert_grid(), please pass grid parameters AP and BP" + - "as keyword arguments") - else: - new_grid = vert_grid(AP, BP) - return new_grid.p_edge(), new_grid.p_mid(), np.size(AP) + grid = vert_grid(AP, BP, p_sfc) + return grid.p_edge(), grid.p_mid(), np.size(AP) + + raise ValueError( + "Only 72/73 or 47/48 level vertical grids are automatically\n" + + "determined from input dataset by get_vert_grid().\n" + + "please pass grid parameters AP and BP as keyword arguments" + ) + + # Grid with specified AP, BP + grid = vert_grid(AP, BP, p_sfc) + return grid.p_edge(), grid.p_mid(), np.size(AP) def get_ilev_coord( From 22d22e91d3423646aae34bf10e07e420241fa177 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Wed, 7 Feb 2024 16:54:46 -0500 Subject: [PATCH 10/14] Renamed benchmark_models_vs_ozonesondes.py -> benchmark_models_vs_sondes.py gcpy/benchmark/modules/benchmark_models_vs_ozonesondes.py - Moved to benchmark_models_vs_sondes.py gcpy/benchmark/modules/benchmark_models_vs_sondes.py - Moved from benchmark_models_vs_ozonesondes.py - Added make_benchmark_models_vs_sondes_plots routines - Now use the updated get_vert_grid from gcpy/grid.py - Now read a metadata file with the surface pressure at each of the observation stations, so that the model & data grid can start at the same surface pressure - Renamed variables for consistency CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- CHANGELOG.md | 2 +- .../benchmark_models_vs_ozonesondes.py | 443 -------------- .../modules/benchmark_models_vs_sondes.py | 566 ++++++++++++++++++ gcpy/benchmark/modules/benchmark_utils.py | 18 +- 4 files changed, 579 insertions(+), 450 deletions(-) delete mode 100644 gcpy/benchmark/modules/benchmark_models_vs_ozonesondes.py create mode 100644 gcpy/benchmark/modules/benchmark_models_vs_sondes.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 868cb856..570df984 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Script `./release/changeVersionNumbers.sh`, used to update version numbers in various files before release - Mamba/Conda enviroment file `docs/environment_files/read_the_docs_environment.yml`, for building ReadTheDocs documentation - Environment files `docs/environment_files/gcpy_requirements.txt` and `docs/environment_files/read_the_docs_requirements.txt` -- New benchmark script `gcpy/benchmark/modules/benchmark_models_vs_ozonesondes.py` +- New benchmark script `gcpy/benchmark/modules/benchmark_models_vs_sondes.py` ### Changed - Bump pip from 23.2.1 to 23.3 (dependabot suggested this) diff --git a/gcpy/benchmark/modules/benchmark_models_vs_ozonesondes.py b/gcpy/benchmark/modules/benchmark_models_vs_ozonesondes.py deleted file mode 100644 index 29468f6e..00000000 --- a/gcpy/benchmark/modules/benchmark_models_vs_ozonesondes.py +++ /dev/null @@ -1,443 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -"""Plots ozone sonde data vs. GEOS-Chem data for 1-yr benchmarks""" - -import os -import numpy as np -import pandas as pd -import xarray as xr -from matplotlib.backends.backend_pdf import PdfPages -import matplotlib.pyplot as plt -from gcpy.grid import _GEOS_72L_AP, _GEOS_72L_BP, \ - get_nearest_model_data, vert_grid -from gcpy.util import verify_variable_type -from gcpy.benchmark.modules.benchmark_utils import get_geoschem_level_metadata - - -def get_model_ozone_data(file_path): - """ - Returns model ozone data as a Pandas Dataframe object - - Args: - ----- - file_path (str) : Path to the data files - - Returns: - -------- - data (xr.DataArray) : Ozone data [ppbv] - """ - verify_variable_type(file_path, str) - - # Read data (if path has wildcard, will read multiple data files) - data = xr.open_mfdataset(file_path) - - # Convert from v/v to ppb - data *= 1.0e9 - - # Only pull out the Ozone data - if "SpeciesConcVV_O3" in data.data_vars: - return data["SpeciesConcVV_O3"] - if "SpeciesConc_O3" in data.data_vars: - return data["SpeciesConc_O3"] - - -def get_ref_and_dev_model_data( - ref_dir, - dev_dir, - collection="SpeciesConc", - year="2019" -): - """ - Returns the GEOS-Chem model data for Ref & Dev versions - - Args: - ----- - ref_dir (str) : Path to the Ref model SpeciesConc files - dev_dir (str) : Path to the Dev model SpeciesConc files - - Keyword Args (optional): - ------------------------ - collection (str) : Diagnostic collection (default: SpeciesConc) - year (str) : Benchmark year (default: 2019) - - Returns: - -------- - data_ref, data_dev (xr.DataArray) : Ref & Dev Ozone data - """ - verify_variable_type(ref_dir, str) - verify_variable_type(dev_dir, str) - - # Get the model data - data_ref = get_model_ozone_data( - f"{ref_dir}/GEOSChem.{collection}.{year}*.nc4" - ) - data_dev = get_model_ozone_data( - f"{dev_dir}/GEOSChem.{collection}.{year}*.nc4" - ) - - return data_ref, data_dev - - -def get_obs_ozone_data(file_path): - """ - Returns the ozone sonde observations as a pandas DataFrame object. - - Args: - ----- - file_path (str) : Path to ozone sonde file - - Returns: - -------- - data (pd.DataFrame) : Ozonesonde observations at various sites (ppbv) - """ - verify_variable_type(file_path, str) - - data = pd.read_csv(file_path, delimiter=',', index_col=0) - data = data.rename(columns={"Latitude": "lat", "Longitude": "lon"}) - - return data - - -def get_seasonal_means(obs, ref, dev, months): - """ - Returns seasonally averaged data for the observations - and Ref and Dev models. Also returns the data - """ - verify_variable_type(obs, pd.DataFrame) - verify_variable_type(ref, pd.DataFrame) - verify_variable_type(dev, pd.DataFrame) - verify_variable_type(months, list) - - # Filter data for season - seasonal_obs = obs[(obs['month'].isin(months))] - seasonal_ref = ref[(ref["month"].isin(months))] - seasonal_dev = dev[(dev["month"].isin(months))] - - # Take the seasonal means - mean_obs = seasonal_obs.groupby('pressure')['o3_ppb'].mean() - std_obs = seasonal_obs.groupby('pressure')['o3_ppb_sd'].mean() - mean_ref = seasonal_ref.groupby('pressure')['o3_ppb'].mean() - mean_dev = seasonal_dev.groupby('pressure')['o3_ppb'].mean() - - return mean_obs, std_obs, mean_ref, mean_dev - - -def get_nearest_model_data_to_obs( - gc_data, - lon, - lat, - gc_pressure, - gc_cs_grid=None -): - """ - Returns the nearest GEOS-Chem data to an observation. Also - inserts the GEOS-Chem pressure levels into the dataset. - """ - verify_variable_type(gc_data, (xr.Dataset, xr.DataArray)) - verify_variable_type(gc_cs_grid, (xr.Dataset, type(None))) - - # Nearest data to the observation (pd.DataFrame) - data = get_nearest_model_data(gc_data, lon, lat, gc_cs_grid).reset_index() - - # NOTE: This would more accurately represent the pressure coordinate. - # Need to get approx surface pressure at each site. - #pmid = vert_grid(_GEOS_72L_AP, _GEOS_72L_BP, p_sfc=X).p_mid() - #pmid = np.tile(pmid, 12) - - # Add "Pressure" and "month" columns - data.insert(4, "pressure", gc_pressure) - data["month"] = data["time"].dt.month - - # Rename columns to be consistent w/ obs data - data = data.rename( - columns={"SpeciesConcVV_O3": "o3_ppb", "SpeciesConc_O3": "o3_ppb"} - ) - - return data - - -def plot_one_site( - axes_subplot, - season, - season_idx, - site_idx, - mean_obs, - std_obs, - gc_ref_label, - mean_ref, - gc_dev_label, - mean_dev -): - """ - """ - - # Plotting observations with error bars - axes_subplot.errorbar( - mean_obs, - mean_obs.index, - xerr=std_obs, - fmt='-o', - color='black', - markersize=3, - label='Observations' - ) - - # Plotting model data - axes_subplot.plot( - mean_ref, - mean_ref.index, - color='r', - marker='o', - markersize=3, - lw=1, - label=gc_ref_label - ) - axes_subplot.plot( - mean_dev, - mean_dev.index, - color='g', - marker='o', - markersize=3, - lw=1, - label=gc_dev_label, - ) - - # Inverting y-axis and setting y-axis to start from 1000 hPa - axes_subplot.invert_yaxis() - axes_subplot.set_ylim(1000, 50) - - # Setting x-axis scale to 0-160 ppb - axes_subplot.set_xlim(0, 160) - - # X/Y ticks - axes_subplot.tick_params(axis='both', labelsize=12) - - # Get current x-axis tick values - xticks = axes_subplot.get_xticks() - - # Exclude '0' from the x-axis tick labels - xtick_labels = [f'{tick:.0f}' if tick != 0 else '' for tick in xticks] - - # Set modified tick labels - axes_subplot.set_xticks(xticks) - axes_subplot.set_xticklabels(xtick_labels) - - # Add a legend in the fourth column and first row - if season_idx == 1 and site_idx == 4: - axes_subplot.legend( - loc='lower right', - fontsize='large' - ) - - # Adding latitude and season as text inside the plot - if site_idx == 1: - axes_subplot.text( - 0.05, - 0.8, - f'{season}', - transform=axes_subplot.transAxes, - fontsize=20, - verticalalignment='top', - ) - -def page_adjustments(fig): - """ - Adjusts the page settings after all the subplots have been made. - - Args: - ----- - fig (mpl.Figure) : Figure object - """ - - # Eliminating space between subplots completely - plt.subplots_adjust(wspace=0, hspace=0) - - # Adjust subplot layout - plt.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05) - - # Setting common labels - fig.text( - 0.5, - 0.01, - 'Ozone Concentration (ppb)', - ha='center', - fontsize=20 - ) - fig.text( - 0.0, - 0.5, - 'Pressure (hPa)', - va='center', - rotation='vertical', - fontsize=20 - ) - - #plt.tight_layout() - - -def plot_the_data( - obs_data, - gc_ref_label, - gc_ref_data, - gc_dev_label, - gc_dev_data, - gc_pressure, - pdf_path, - gc_cs_grid_ref=None, - gc_cs_grid_dev=None, -): - """ - Creates plots of model data vs. ozonesonde data - - Args: - ----- - obs_data (pd.DataFrame) : Observational data - gc_ref_label (str ) : Label for Ref model data - gc_ref_data (xr.DataArray) : Ref model data - gc_dev_label (str ) : Label for Dev model data - data_dev (xr.DataArray) : Dev model data - gc_pressure (np.ndarray ) : GC pressures (hPa), tiled x 12 months - pdf_path (str ) : Path to PDF file that will be created - """ - - # Define seasons - seasons = { - 'DJF': [12, 1, 2], - 'JJA': [3, 4, 5], - 'MAM': [6, 7, 8], - 'SON': [9, 10, 11] - } - - # Getting unique sites and sorting them - sorted_sites = sorted(obs_data['Site'].unique()) - - # Creating the PDF with no space between subplots - pdf_pages = PdfPages(pdf_path) - - # ================================================================== - # Loop over pages (4 sites per page) - # ================================================================== - for page_idx in range(1): - #for page_idx in range(0, len(sorted_sites), 4): - fig, axs = plt.subplots( - 4, 4, - figsize=(15, 15), - sharex=True, - sharey=True - ) - - # Selecting the next four sites for this page - sites_for_page = sorted_sites[page_idx:page_idx+4] - - # ============================================================== - # Loop over sites - # ============================================================== - for site_idx, site in enumerate(sites_for_page, 1): - - # Get the lon & lat for each site - lat = obs_data[obs_data['Site'] == site]['lat'].iloc[0] - lon = obs_data[obs_data['Site'] == site]['lon'].iloc[0] - - # Get nearest model data to the observation as pd.DataFrame - nearest_ref = get_nearest_model_data_to_obs( - gc_ref_data, - lon, - lat, - gc_pressure, - gc_cs_grid=gc_cs_grid_ref - ) - nearest_dev = get_nearest_model_data_to_obs( - gc_dev_data, - lon, - lat, - gc_pressure, - gc_cs_grid=gc_cs_grid_dev - ) - - # Adding site names at the top of each column - axs[0, site_idx-1].set_title(f'{site} ({lat}°)', size=15) - - # ========================================================== - # Loop over seasons (DJF, JJA, MAM, SON) - # ========================================================== - for season_idx, (season, months) in enumerate(seasons.items(), 1): - - # Get the seasonal means of observations & model data - mean_obs, std_obs, mean_ref, mean_dev = get_seasonal_means( - obs_data[obs_data["Site"] == site], - nearest_ref, - nearest_dev, - months - ) - - # Create a plot for a single site (all seasons) - plot_one_site( - axes_subplot=axs[season_idx-1, site_idx-1], - season=season, - season_idx=season_idx, - site_idx=site_idx, - mean_obs=mean_obs, - std_obs=std_obs, - gc_ref_label=gc_ref_label, - mean_ref=mean_ref, - gc_dev_label=gc_dev_label, - mean_dev=mean_dev - ) - - # ================================================================ - # Global page adjustments - # ================================================================ - page_adjustments(fig) - pdf_pages.savefig() - plt.close() - - # ================================================================ - # Save the PDF - # ================================================================ - pdf_pages.close() - #print('The pdf has been closed') - - -def main(): - """ - Main program (for testing) - """ - - # Define the base directories for the two models (customized) - gc_ref_label = '14.2.0-rc.2' - gc_dev_label = '14.3.0-rc.0' - model_root_dir = "/n/holyscratch01/jacob_lab/ryantosca/BM/1yr" - obs_root_dir = "/n/jacob_lab/Lab/obs_data_for_bmk/atom_obs" - - # figure saveout path - pdf_file = '/n/holyscratch01/jacob_lab/ryantosca/BM/1yr/ozonesondes.pdf' - - # Get the model data - gc_ref_data, gc_dev_data = get_ref_and_dev_model_data( - f"{model_root_dir}/{gc_ref_label}/GCClassic/FullChem/OutputDir", - f"{model_root_dir}/{gc_dev_label}/GCClassic/FullChem/OutputDir" - ) - - # Get the observational data - obs_data = get_obs_ozone_data( - f"{obs_root_dir}/allozonesondes_2010-2019.csv" - ) - - # GEOS-Chem pressure levels, tiled for 12 months - gc_levels = get_geoschem_level_metadata() - gc_pressure = np.tile(gc_levels["Pressure (hPa)"].to_numpy(), 12) - - # Create plots - plot_the_data( - obs_data, - gc_ref_label, - gc_ref_data, - gc_dev_label, - gc_dev_data, - gc_pressure, - pdf_file, - ) - -if __name__ == '__main__': - main() diff --git a/gcpy/benchmark/modules/benchmark_models_vs_sondes.py b/gcpy/benchmark/modules/benchmark_models_vs_sondes.py new file mode 100644 index 00000000..b22410bd --- /dev/null +++ b/gcpy/benchmark/modules/benchmark_models_vs_sondes.py @@ -0,0 +1,566 @@ +#!/usr/bin/env python +# coding: utf-8 + +"""Plots ozone sonde data vs. GEOS-Chem data for 1-yr benchmarks""" + +import os +import numpy as np +import pandas as pd +import xarray as xr +from matplotlib.backends.backend_pdf import PdfPages +import matplotlib.pyplot as plt +from gcpy.cstools import extract_grid +from gcpy.grid import get_nearest_model_data, get_vert_grid +from gcpy.util import make_directory, verify_variable_type +from gcpy.benchmark.modules.benchmark_utils import read_ref_and_dev + +# Tell matplotlib not to look for an X-window +os.environ["QT_QPA_PLATFORM"] = "offscreen" + + +def get_ref_and_dev_model_data( + ref_filepaths, + dev_filepaths, + varname="SpeciesConcVV_O3", +): + """ + Returns GEOS-Chem model ozone data as a Pandas Dataframe object + + Args + file_path : str : Path to the data files + + Returns + data : xr.DataArray : Ozone data [ppbv] + """ + verify_variable_type(ref_filepaths, (str,list)) + verify_variable_type(dev_filepaths, (str,list)) + + # Get the model data + ref_data, dev_data = read_ref_and_dev( + ref_filepaths, + dev_filepaths, + multi_file=True + ) + + # Return the species of interest and convert to ppbv + return ref_data[varname]*1.0e9, dev_data[varname]*1.0e9 + + +def get_obs_ozone_data(file_path): + """ + Returns the ozone sonde observations as a pandas DataFrame object. + + Args + file_path : str : Path to ozone sonde file + + Returns + data : pd.DataFrame : Sonde observations (ppbv) + """ + verify_variable_type(file_path, str) + + data = pd.read_csv(file_path, delimiter=',', index_col=0) + data = data.rename(columns={"Latitude": "lat", "Longitude": "lon"}) + + return data + + +def get_site_coords( + obs_data, + obs_site_metadata, + site_name, +): + """ + Returns the lon, lat, and surface pressure at each sonde site. + + Args + obs_data : pd.DataFrame : Observational data + obs_site_metadata : pd.DataFrame : Surface pressure at observation sites + site_name : str : Observation site name + + Returns + lat : float : Latitude at observation site + lon : float : Longitude at observation site + p_sfc : float : Surface pressure (hPa) at obs site + """ + search = obs_data['Site'] == site_name + lat = obs_data[search]['lat'].iloc[0] + lon = obs_data[search]['lon'].iloc[0] + + search = obs_site_metadata['Site'] == site_name + p_sfc = obs_site_metadata[search]["Surface Pressure (hPa)"].iloc[0] + + return lat, lon, p_sfc + + +def get_seasonal_means( + obs_data, + ref_data, + dev_data, + months, + varname="SpeciesConcVV_O3", +): + """ + Returns seasonally averaged data for the observations + and models. + + Args + obs_data : pd.DataFrame : O3 observations (ppbv) + ref_data : pd.DataFrame : O3 from Ref model (ppbv) + dev_data : pd.DataFrame : O3 from Dev model (ppbv) + months : list : Months in each season + + Keyword Args + varname : str : GEOS-Chem diagnostic variable name + + Returns + obs_seas_mean : pd.DataFrame : Seasonal mean O3 observations (ppbv) + std_dev : pd.DataFrame : Standard deviation in mean_obs (ppbv) + ref_seas_mean : pd.DataFrame ; Seasonal mean O3 from Ref (ppbv) + dev_seas_mean : pd.DataFrame : Seasonal mean O3 from Dev (ppbv) + """ + verify_variable_type(obs_data, pd.DataFrame) + verify_variable_type(ref_data, pd.DataFrame) + verify_variable_type(dev_data, pd.DataFrame) + verify_variable_type(months, list) + + # Filter data for season + obs_this_season = obs_data[(obs_data['month'].isin(months))] + ref_this_season = ref_data[(ref_data["month"].isin(months))] + dev_this_season = dev_data[(dev_data["month"].isin(months))] + + # Take the seasonal means + obs_seas_mean = obs_this_season.groupby('pressure')['o3_ppb'].mean() + std_dev = obs_this_season.groupby('pressure')['o3_ppb_sd'].mean() + ref_seas_mean = ref_this_season.groupby('pressure')[varname].mean() + dev_seas_mean = dev_this_season.groupby('pressure')[varname].mean() + + return obs_seas_mean, std_dev, ref_seas_mean, dev_seas_mean + + +def get_nearest_model_data_to_obs( + data, + lon, + lat, + p_sfc, + cs_grid=None, +): + """ + Returns the nearest GEOS-Chem data to an observation. Also + inserts the GEOS-Chem pressure levels into the dataset. + + Args + data : xr.DataArray : GEOS-Chem data + lon : float : Longitude at obs site + lat : float : Latitude at obs site + p_sfc : float : Sfc pressure at obs site + + Keyword Args + cs_grid : xr.Dataset|None : Cubed-sphere grid metadata + """ + verify_variable_type(data, xr.DataArray) + verify_variable_type(cs_grid, (xr.Dataset, type(None))) + + # Nearest data to the observation (pd.DataFrame) + nearest = get_nearest_model_data( + data, + lon, + lat, + cs_grid + ).reset_index() + + # Vertical pressure grid for GEOS-Chem. + _, p_mid, _ = get_vert_grid(data, p_sfc=p_sfc) + + # Repeat surface pressure for 12 months of data + p_mid = np.tile(p_mid, 12) + + # Add "Pressure" and "month" columns + nearest.insert(4, "pressure", p_mid) + nearest["month"] = nearest["time"].dt.month + + return nearest + + +def plot_one_site( + axes_subplot, + season, + season_idx, + site_idx, + obs_seas_mean, + std_dev, + ref_label, + ref_seas_mean, + dev_label, + dev_seas_mean, +): + """ + Plots ozonesonde vs. model data for all seasons at a single site + (i.e. one column of a page). + + Args + axes_supblot : mpl.Axes : The current subplot + season : str : Name of the season + season_idx : int : Index of the current season + site_idx : int : Index of the current site + obs_seas_mean : pd.Series : Seasonal mean O3 obs (ppbv) + std_dev : pd.Series : Std dev of mean_obs (ppbv) + ref_label : str : Label for the Ref version + ref_seas_mean : pd.Series ; Seasonal mean O3 from Ref (ppbv) + dev_label : str : Label for the Dev version + dev_seas_mean : pd.Series ; Seasonal mean O3 from Dev (ppbv) + """ + verify_variable_type(season, str) + verify_variable_type(season_idx, int) + verify_variable_type(site_idx, int) + verify_variable_type(obs_seas_mean, pd.Series) + verify_variable_type(std_dev, pd.Series) + verify_variable_type(ref_label, str) + verify_variable_type(ref_seas_mean, pd.Series) + verify_variable_type(dev_label, str) + verify_variable_type(dev_seas_mean, pd.Series) + + # Plotting observations with error bars + axes_subplot.errorbar( + obs_seas_mean, + obs_seas_mean.index, + xerr=std_dev, + fmt='-o', + color='black', + markersize=3, + label='Observations' + ) + + # Plotting model data + axes_subplot.plot( + ref_seas_mean, + ref_seas_mean.index, + color='r', + marker='o', + markersize=3, + lw=1, + label=ref_label + ) + axes_subplot.plot( + dev_seas_mean, + dev_seas_mean.index, + color='g', + marker='o', + markersize=3, + lw=1, + label=dev_label, + ) + + # Inverting y-axis and setting y-axis to start from 1000 hPa + axes_subplot.invert_yaxis() + axes_subplot.set_ylim(1000, 50) + + # Setting x-axis scale to 0-160 ppb + axes_subplot.set_xlim(0, 160) + + # X/Y ticks + axes_subplot.tick_params(axis='both', labelsize=12) + + # Get current x-axis tick values + xticks = axes_subplot.get_xticks() + + # Exclude '0' from the x-axis tick labels + xtick_labels = [f'{tick:.0f}' if tick != 0 else '' for tick in xticks] + + # Set modified tick labels + axes_subplot.set_xticks(xticks) + axes_subplot.set_xticklabels(xtick_labels) + + # Add a legend in the fourth column and first row + if season_idx == 1 and site_idx == 4: + axes_subplot.legend( + loc='lower right', + fontsize='large' + ) + + # Adding latitude and season as text inside the plot + if site_idx == 1: + axes_subplot.text( + 0.95, + 0.2, + f'{season}', + transform=axes_subplot.transAxes, + fontsize=20, + verticalalignment='top', + horizontalalignment="right", + ) + + +def page_adjustments(fig): + """ + Adjusts the page settings after all the subplots have been made. + + Args: + fig : mpl.Figure : Figure object + """ + + # Eliminating space between subplots completely + plt.subplots_adjust(wspace=0, hspace=0) + + # Adjust subplot layout + plt.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05) + + # Setting common labels + fig.text( + 0.5, + 0.01, + 'Ozone Concentration (ppb)', + ha='center', + fontsize=20 + ) + fig.text( + 0.0, + 0.5, + 'Pressure (hPa)', + va='center', + rotation='vertical', + fontsize=20 + ) + + +def plot_the_data( + obs_data, + obs_site_metadata, + ref_label, + ref_data, + dev_label, + dev_data, + dst="./benchmark", + varname="SpeciesConcVV_O3", +): + """ + Creates plots of model data vs. ozonesonde data + + Args + obs_data : pd.DataFrame : Observational data + obs_site_metadata : pd.DataFrame : Sfs pressure at observation sites + ref_label : str : Label for Ref model data + ref_data : xr.DataArray : Ref model data + dev_label : str : Label for Dev model data + data_dev : xr.DataArray : Dev model data + pdf_path : str : Path to PDF output file + + Keyword Args + dst : str : Destination folder + varname : str : Name of variable to plot + """ + # ================================================================== + # Initialization + # ================================================================== + + # Get cubed-sphere metadata (will be None for non-CS grids) + # This is needed to find the nearest grid box to the observations + ref_cs_grid = extract_grid(ref_data) + dev_cs_grid = extract_grid(dev_data) + + # Define seasons + seasons = { + 'DJF': [12, 1, 2], + 'JJA': [3, 4, 5], + 'MAM': [6, 7, 8], + 'SON': [9, 10, 11] + } + + # Unique site names (needed for loops below) + sorted_sites = sorted(obs_data['Site'].unique()) + + # Creating the PDF with no space between subplots + # Open the plot as a PDF document + pdf_file = f"{dst}/models_vs_sondes.{varname.split('_')[1]}.pdf" + pdf_pages = PdfPages(pdf_file) + + # ================================================================== + # Loop over pages (4 sites per page) + # ================================================================== + for page_idx in range(0, len(sorted_sites), 4): + fig, axes = plt.subplots( + 4, 4, + figsize=(15, 15), + sharex=True, + sharey=True + ) + + # Selecting the next four sites for this page + sites_for_page = sorted_sites[page_idx:page_idx+4] + + # ============================================================== + # Loop over sites + # ============================================================== + for site_idx, site in enumerate(sites_for_page, 1): + + # Get coordinates of observation site + lat, lon, p_sfc = get_site_coords( + obs_data, + obs_site_metadata, + site, + ) + + # Get nearest model data to the observation as pd.DataFrame + nearest_ref = get_nearest_model_data_to_obs( + ref_data, + lon, + lat, + p_sfc, + cs_grid=ref_cs_grid, + ) + nearest_dev = get_nearest_model_data_to_obs( + dev_data, + lon, + lat, + p_sfc, + cs_grid=dev_cs_grid, + ) + + # Adding site names at the top of each column + axes[0, site_idx-1].set_title(f'{site} ({lat}°)', size=15) + + # ========================================================== + # Loop over seasons (DJF, JJA, MAM, SON) + # ========================================================== + for season_idx, (season, months) in enumerate(seasons.items(), 1): + + # Get the seasonal means of observations & model data + obs_seas_mean, std_dev, \ + ref_seas_mean, dev_seas_mean = get_seasonal_means( + obs_data[obs_data["Site"] == site], + nearest_ref, + nearest_dev, + months + ) + + # Create a plot for a single site (all seasons) + axes_subplot = axes[season_idx-1, site_idx-1] + plot_one_site( + axes_subplot, + season, + season_idx, + site_idx, + obs_seas_mean, + std_dev, + ref_label, + ref_seas_mean, + dev_label, + dev_seas_mean, + ) + + # ================================================================ + # Global page adjustments + # ================================================================ + page_adjustments(fig) + pdf_pages.savefig() + plt.close() + + # ================================================================ + # Save the PDF + # ================================================================ + pdf_pages.close() + + +def make_benchmark_models_vs_sondes_plots( + obs_data_file, + obs_site_file, + ref_filepaths, + ref_label, + dev_filepaths, + dev_label, + dst="./benchmark", + overwrite=False, + varname="SpeciesConcVV_O3", + + ): + """ + Creates plots of sonde data vs. GEOS-Chem output. For use in the + 1-year benchmark plotting workflow. + + Args: + obs_data_file : str : File containing sonde data + obs_site_file : str : File containing sonde site metadata + ref_filepaths : str|list : Files for the GEOS-Chem Ref version + ref_label : str : GEOS-Chem Ref version label + dev_filepaths : str|list : Files for the GEOS-Chem Dev version + dev_label : str : GEOS-Chem Dev version label + + Keyword Args + dst : str : Folder where PDF w/ plots will be created + overwrite : bool : Overwrite contents of dst folder? + varname : str : GEOS-Chem diagnostic variable name + verbose : bool : Activate verbose printout? + + """ + verify_variable_type(obs_data_file, str) + verify_variable_type(obs_site_file, str) + verify_variable_type(ref_filepaths, (str, list)) + verify_variable_type(ref_label, str) + verify_variable_type(dev_filepaths, (str, list)) + verify_variable_type(dev_label, str) + + # Create the destination folder + make_directory( + dst, + overwrite=overwrite + ) + + # Get the observational data and site metadata + obs_data = get_obs_ozone_data(obs_data_file) + obs_site_metadata = pd.read_csv(obs_site_file) + + # Get the model data from Ref and Dev versions + ref_data, dev_data = get_ref_and_dev_model_data( + ref_filepaths, + dev_filepaths, + varname=varname, + ) + + # Create plots + plot_the_data( + obs_data, + obs_site_metadata, + ref_label, + ref_data, + dev_label, + dev_data, + dst, + ) + + +def main(): + """ + Main program (for testing) + """ + # Define the base directories for the two models (customized) + ref_label = '14.2.0-rc.2' + dev_label = '14.3.0-rc.0' + + # Observation file paths + obs_root_dir = "/n/jacob_lab/Lab/obs_data_for_bmk/atom_obs/" + obs_data_file = f"{obs_root_dir}/allozonesondes_2010-2019.csv" + obs_site_file = f"{obs_root_dir}/allozonesondes_elevation.csv" + + # Model file paths + model_root_dir = "/n/holyscratch01/jacob_lab/ryantosca/BM/1yr" + suffix = "GCClassic/FullChem/OutputDir/GEOSChem.SpeciesConc*2019*.nc4" + ref_filepaths = f"{model_root_dir}/{ref_label}/{suffix}" + dev_filepaths = f"{model_root_dir}/{dev_label}/{suffix}" + + # PDF output folder + dst = "/n/holyscratch01/jacob_lab/ryantosca/BM/1yr/BenchmarkResults/GCC_version_comparison/ModelVsObs" + + # Create the plots + make_benchmark_models_vs_sondes_plots( + obs_data_file, + obs_site_file, + ref_filepaths, + ref_label, + dev_filepaths, + dev_label, + dst=dst, + overwrite=True, + ) + + +if __name__ == '__main__': + main() diff --git a/gcpy/benchmark/modules/benchmark_utils.py b/gcpy/benchmark/modules/benchmark_utils.py index 27778bd4..4f8a1aad 100644 --- a/gcpy/benchmark/modules/benchmark_utils.py +++ b/gcpy/benchmark/modules/benchmark_utils.py @@ -57,6 +57,7 @@ def read_ref_and_dev( ref, dev, time_mean=False, + multi_file=False, verbose=False ): """ @@ -64,14 +65,19 @@ def read_ref_and_dev( Args: ----- - ref (str|list ) : Ref data file(s) - def (str|list ) : Dev data file(s) - time_mean (bool ) : Return the average over the time dimension? + ref (str|list) : Ref data file(s) + def (str|list) : Dev data file(s) + + Keyword Args (optional) + ----------------------- + multi_file (bool) : Read multiple files w/o taking avg over time + time_mean (bool) : Return the average over the time dimension? + verbose (bool) : Enable verbose output Returns: -------- - ref_data (xr.Dataset) : Data from the Ref model - dev_data (xr.Dataset) ls: Data from the Dev model + ref_data (xr.Dataset) : Data from the Ref model + dev_data (xr.Dataset) : Data from the Dev model """ util.verify_variable_type(ref, (str, list)) util.verify_variable_type(dev, (str, list)) @@ -79,7 +85,7 @@ def (str|list ) : Dev data file(s) ref_data = None dev_data = None - reader = util.dataset_reader(time_mean, verbose=verbose) + reader = util.dataset_reader(time_mean|multi_file, verbose=verbose) if ref is not None: ref_data = reader(ref, drop_variables=skip_these_vars) From 3348b29d82d160be843480056e2408661b1a6149 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Thu, 8 Feb 2024 11:59:59 -0500 Subject: [PATCH 11/14] Add models vs. sondes plots to run_1yr_fullchem_benchmark.py script gcpy/benchmark/benchmark_slurm.sh - Now save output to a log file with the same base name as the YAML configuration file gcpy/benchmark/modules/benchmark_models_vs_obs.py - Renamed to benchmark_models_vs_ebas.py gcpy/benchmark/modules/benchmark_models_vs_ebas_o3.py - Renamed from benchmark_models_vs_obs.py - Also renamed driver routine to make_benchmark_vs_ebas_obs_plots gcpy/benchmark/modules/benchmark_models_vs_sondes.py - Removed the main function (this is for debugging only) gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py - Now call make_benchmark_1yr_models_vs_ebas_o3 routine - Now call make_benchmark_models_vs_sondes_plots routine gcpy/benchmark/config/1yr_fullchem_benchmark.yml - Expanded obs_data section to include data directories & file names for EBAS and sonde observations - Removed extraneous comments CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- CHANGELOG.md | 3 + gcpy/benchmark/benchmark_slurm.sh | 2 +- .../config/1yr_fullchem_benchmark.yml | 13 ++- ..._obs.py => benchmark_models_vs_ebas_o3.py} | 4 +- .../modules/benchmark_models_vs_sondes.py | 39 --------- .../modules/run_1yr_fullchem_benchmark.py | 82 +++++++++++++++---- 6 files changed, 81 insertions(+), 62 deletions(-) rename gcpy/benchmark/modules/{benchmark_models_vs_obs.py => benchmark_models_vs_ebas_o3.py} (99%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 570df984..83a40f6f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Script `benchmark_model_vs_obs.py` now uses grid inquiry functions from `grid.py` to return data nearest to a (lat,lon) location - Moved routine `get_geoschem_level_metadata` to `gcpy/benchmark/modules/benchmark_utils.py` - Refactored `get_vert_grid.py` (in `gcpy/grid.py`) to accept the `p_sfc` argument; Also never-nested the if-block logic. +- Renamed `gcpy/benchmark/modules/benchmark_models_vs_obs.py` to `benchmark_models_vs_ebas_o3.py` +- `benchmark_slurm.sh` script now saves output to a log file with the + same base name as the YAML config file ### Fixed - CS inquiry functions in `gcpy/cstools.py` now work properly for `xr.Dataset` and `xr.DataArray` objects diff --git a/gcpy/benchmark/benchmark_slurm.sh b/gcpy/benchmark/benchmark_slurm.sh index 7b8992d0..e75f617a 100755 --- a/gcpy/benchmark/benchmark_slurm.sh +++ b/gcpy/benchmark/benchmark_slurm.sh @@ -34,7 +34,7 @@ config="1yr_fullchem_benchmark.yml" #config="1yr_tt_benchmark.yml" # Call the run_benchmark script to make the plots -python -m gcpy.benchmark.run_benchmark "${config}" > benchmark.log 2>&1 +python -m gcpy.benchmark.run_benchmark "${config}" > "${config/.yml/.log}: 2>&1 # Turn off python environment mamba deactivate diff --git a/gcpy/benchmark/config/1yr_fullchem_benchmark.yml b/gcpy/benchmark/config/1yr_fullchem_benchmark.yml index 7455af1d..b106bd4d 100644 --- a/gcpy/benchmark/config/1yr_fullchem_benchmark.yml +++ b/gcpy/benchmark/config/1yr_fullchem_benchmark.yml @@ -24,14 +24,23 @@ # spcdb_dir: Folder in which the species_database.yml file is # located. If set to "default", then will look for # species_database.yml in one of the Dev rundirs. -# obs_data_dir: Path to observational data (for models vs obs plots) +# obs_data: Paths to observations (for models vs. obs plots) # paths: main_dir: /path/to/benchmark/main/dir results_dir: /path/to/BenchmarkResults weights_dir: /n/holyscratch01/external_repos/GEOS-CHEM/gcgrid/data/ExtData/GCHP/RegriddingWeights spcdb_dir: default - obs_data_dir: /path/to/observational/data + # + # Observational data dirs are on Harvard Cannon, edit if necessary + # + obs_data: + ebas_o3: + data_dir: /n/jacob_lab/Lab/obs_data_for_bmk/ebas_o3/ebas_sfc_o3_2019 + sondes: + data_dir: /n/jacob_lab/Lab/obs_data_for_bmk/sondes_2010-2019 + data_file: allozonesondes_2010-2019.csv + site_file: allozonesondes_site_elev.csv # # data: Contains configurations for ref and dev runs # version: Version string (must not contain spaces) diff --git a/gcpy/benchmark/modules/benchmark_models_vs_obs.py b/gcpy/benchmark/modules/benchmark_models_vs_ebas_o3.py similarity index 99% rename from gcpy/benchmark/modules/benchmark_models_vs_obs.py rename to gcpy/benchmark/modules/benchmark_models_vs_ebas_o3.py index d7969c72..d8365478 100644 --- a/gcpy/benchmark/modules/benchmark_models_vs_obs.py +++ b/gcpy/benchmark/modules/benchmark_models_vs_ebas_o3.py @@ -12,7 +12,6 @@ Linted with PyLint and incorporated into GCPy by Bob Yantosca """ -import os import glob from datetime import datetime, timedelta from matplotlib.backends.backend_pdf import PdfPages @@ -633,7 +632,6 @@ def plot_one_page( rows_per_page=3, cols_per_page=3, varname="SpeciesConcVV_O3", - **kwargs ): """ Plots a single page of models vs. observations. @@ -841,7 +839,7 @@ def plot_models_vs_obs( pdf.close() -def make_benchmark_models_vs_obs_plots( +def make_benchmark_models_vs_ebas_o3_plots( obs_filepaths, ref_filepaths, ref_label, diff --git a/gcpy/benchmark/modules/benchmark_models_vs_sondes.py b/gcpy/benchmark/modules/benchmark_models_vs_sondes.py index b22410bd..4f566cca 100644 --- a/gcpy/benchmark/modules/benchmark_models_vs_sondes.py +++ b/gcpy/benchmark/modules/benchmark_models_vs_sondes.py @@ -525,42 +525,3 @@ def make_benchmark_models_vs_sondes_plots( dev_data, dst, ) - - -def main(): - """ - Main program (for testing) - """ - # Define the base directories for the two models (customized) - ref_label = '14.2.0-rc.2' - dev_label = '14.3.0-rc.0' - - # Observation file paths - obs_root_dir = "/n/jacob_lab/Lab/obs_data_for_bmk/atom_obs/" - obs_data_file = f"{obs_root_dir}/allozonesondes_2010-2019.csv" - obs_site_file = f"{obs_root_dir}/allozonesondes_elevation.csv" - - # Model file paths - model_root_dir = "/n/holyscratch01/jacob_lab/ryantosca/BM/1yr" - suffix = "GCClassic/FullChem/OutputDir/GEOSChem.SpeciesConc*2019*.nc4" - ref_filepaths = f"{model_root_dir}/{ref_label}/{suffix}" - dev_filepaths = f"{model_root_dir}/{dev_label}/{suffix}" - - # PDF output folder - dst = "/n/holyscratch01/jacob_lab/ryantosca/BM/1yr/BenchmarkResults/GCC_version_comparison/ModelVsObs" - - # Create the plots - make_benchmark_models_vs_sondes_plots( - obs_data_file, - obs_site_file, - ref_filepaths, - ref_label, - dev_filepaths, - dev_label, - dst=dst, - overwrite=True, - ) - - -if __name__ == '__main__': - main() diff --git a/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py index 9c18a750..7b3d033e 100644 --- a/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py +++ b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py @@ -62,8 +62,11 @@ import gcpy.oh_metrics as oh import gcpy.budget_ox as ox from gcpy import benchmark_funcs as bmk -import gcpy.benchmark.modules.benchmark_models_vs_obs as mvo import gcpy.benchmark.modules.benchmark_utils as bmk_util +from gcpy.benchmark.modules.benchmark_models_vs_ebas_o3 \ + import make_benchmark_models_vs_ebas_o3_plots +from gcpy.benchmark.modules.benchmark_models_vs_sondes \ + import make_benchmark_models_vs_sondes_plots #TODO: Peel out routines from benchmark_funcs.py into smaller # routines in the gcpy/benchmark/modules folder, such as these: from gcpy.benchmark.modules.benchmark_drydep \ @@ -255,6 +258,18 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): diff_of_diffs_refstr = bmk.diff_of_diffs_toprow_title(config, "gcc") diff_of_diffs_devstr = bmk.diff_of_diffs_toprow_title(config, "gchp") + # ====================================================================== + # Observational data files + # ====================================================================== + sondes_data_file = os.path.join( + config["paths"]["sondes"]["data_dir"], + config["paths"]["sondes"]["data_file"], + ) + sondes_site_file = os.path.join( + config["paths"]["sondes"]["data_dir"], + config["paths"]["sondes"]["site_file"], + ) + ######################################################################## ### THE REST OF THESE SETTINGS SHOULD NOT NEED TO BE CHANGED ### ######################################################################## @@ -310,10 +325,6 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): bmk_mons_gchp_dev = all_months_gchp_dev[bmk_mon_inds] bmk_sec_per_month_dev = sec_per_month_dev[bmk_mon_inds] - # List of species for dry deposition velocity plots - - # Get common variables between Ref - # ====================================================================== # Print the list of plots & tables being generated # ====================================================================== @@ -902,9 +913,9 @@ def gcc_vs_gcc_ops_budg(mon): all_months_dev )[0] - # Plot models vs. observations (O3 for now) - mvo.make_benchmark_models_vs_obs_plots( - config["paths"]["obs_data_dir"], + # Plot models vs. EBAS O3 observations + make_benchmark_models_vs_ebas_o3_plots( + config["paths"]["obs_data"]["ebas_o3"]["data_dir"], ref, config["data"]["ref"]["gcc"]["version"], dev, @@ -914,6 +925,18 @@ def gcc_vs_gcc_ops_budg(mon): verbose=False ) + # Plot models vs. sondes + make_benchmark_models_vs_sondes_plots( + sondes_data_file, + sondes_site_file, + ref, + config["data"]["ref"]["gcc"]["version"], + dev, + config["data"]["dev"]["gcc"]["version"], + dst=gcc_vs_gcc_models_vs_obs_dir, + overwrite=True, + ) + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Create GCHP vs GCC benchmark plots and tables @@ -1271,7 +1294,7 @@ def gcc_vs_gcc_ops_budg(mon): overwrite=True, spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"], - varlist=drydepvel_species() + varlist=drydepvel_species() ) # -------------------------------------------------------------- @@ -1539,9 +1562,9 @@ def gchp_vs_gcc_ops_budg(mon): is_gchp=True )[0] - # Plot models vs. observations (O3 for now) - mvo.make_benchmark_models_vs_obs_plots( - config["paths"]["obs_data_dir"], + # Plot models vs. EBAS O3 observations + make_benchmark_models_vs_ebas_o3_plots( + config["paths"]["obs_data"]["ebas_o3"]["data_dir"], ref, config["data"]["dev"]["gcc"]["version"], dev, @@ -1551,6 +1574,19 @@ def gchp_vs_gcc_ops_budg(mon): verbose=False ) + # Plot models vs. sondes + make_benchmark_models_vs_sondes_plots( + sondes_data_file, + sondes_site_file, + ref, + config["data"]["dev"]["gcc"]["version"], + dev, + config["data"]["dev"]["gcc"]["version"], + dst=gchp_vs_gcc_models_vs_obs_dir, + overwrite=True, + ) + + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Create GCHP vs GCHP benchmark plots and tables # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1929,7 +1965,7 @@ def gchp_vs_gcc_ops_budg(mon): overwrite=True, spcdb_dir=spcdb_dir, n_job=config["options"]["n_cores"], - varlist=drydepvel_species() + varlist=drydepvel_species() ) # -------------------------------------------------------------- @@ -2207,11 +2243,11 @@ def gchp_vs_gchp_ops_budg(mon): is_gchp=True )[0] - # Plot models vs. observations (O3 for now) - mvo.make_benchmark_models_vs_obs_plots( - config["paths"]["obs_data_dir"], + # Plot models vs. EBAS O3 observations + make_benchmark_models_vs_ebas_o3_plots( + config["paths"]["obs_data"]["ebas_o3"]["data_dir"], ref, - config["data"]["ref"]["gchp"]["version"], + config["data"]["ref"]["version"], dev, config["data"]["dev"]["gchp"]["version"], dst=gchp_vs_gchp_models_vs_obs_dir, @@ -2219,6 +2255,18 @@ def gchp_vs_gchp_ops_budg(mon): verbose=False ) + # Plot models vs. sondes + make_benchmark_models_vs_sondes_plots( + sondes_data_file, + sondes_site_file, + ref, + config["data"]["dev"]["gcc"]["version"], + dev, + config["data"]["dev"]["gcc"]["version"], + dst=gchp_vs_gchp_models_vs_obs_dir, + overwrite=True, + ) + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Create GCHP vs GCC difference of differences benchmark plots # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% From b9587b08d1beb8671e71fa2d386db9698a53a695 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Thu, 8 Feb 2024 14:03:01 -0500 Subject: [PATCH 12/14] Fix additional bugs in models vs. obs plotting code & config files gcpy/benchmark/config/1yr_fullchem_benchmark.yml - Fix error in ebas_o3:data_dir YAML tag gcpy/benchmark/modules/benchmark_models_vs_ebas_o3.py - Remove **kwargs from plot_models_vs_obs function call - Restore the matplotlib plot style to "default" after plotting finishes, otherwise the plot style will be applied to other plots that follow gcpy/benchmark/models_vs_sondes.py - Add function sort_sites_by_lat to return a unique list of site names sorted by latitude from N to S gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py - Fix error in sonde data file paths (needed to add ["obs_data"]) - In GCHP vs GCC model vs sondes plots, make sure to use config["data"]["dev"]["gchp"]["version"] as the dev label - In GCHP vs GCHP model vs EBAS O3 plots, make sure to use config["data"]["ref"]["gchp"]["version"] as the ref label - In GCHP vs GCHP model vs sondes plots, make sure to use config["data"]["ref"]["gchp"]["version"] and config["data"]["dev"]["gchp"]["version"] as ref & dev labels Signed-off-by: Bob Yantosca --- .../config/1yr_fullchem_benchmark.yml | 2 +- .../modules/benchmark_models_vs_ebas_o3.py | 5 ++++- .../modules/benchmark_models_vs_sondes.py | 22 +++++++++++++++++-- .../modules/run_1yr_fullchem_benchmark.py | 17 +++++++------- 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/gcpy/benchmark/config/1yr_fullchem_benchmark.yml b/gcpy/benchmark/config/1yr_fullchem_benchmark.yml index b106bd4d..3238193a 100644 --- a/gcpy/benchmark/config/1yr_fullchem_benchmark.yml +++ b/gcpy/benchmark/config/1yr_fullchem_benchmark.yml @@ -36,7 +36,7 @@ paths: # obs_data: ebas_o3: - data_dir: /n/jacob_lab/Lab/obs_data_for_bmk/ebas_o3/ebas_sfc_o3_2019 + data_dir: /n/jacob_lab/Lab/obs_data_for_bmk/ebas_sfc_o3_2019 sondes: data_dir: /n/jacob_lab/Lab/obs_data_for_bmk/sondes_2010-2019 data_file: allozonesondes_2010-2019.csv diff --git a/gcpy/benchmark/modules/benchmark_models_vs_ebas_o3.py b/gcpy/benchmark/modules/benchmark_models_vs_ebas_o3.py index d8365478..69f39507 100644 --- a/gcpy/benchmark/modules/benchmark_models_vs_ebas_o3.py +++ b/gcpy/benchmark/modules/benchmark_models_vs_ebas_o3.py @@ -832,12 +832,15 @@ def plot_models_vs_obs( rows_per_page=rows_per_page, # int cols_per_page=cols_per_page, # int varname=varname, # str - **kwargs ) # Close the PDF file after all pages are plotted. pdf.close() + # Reset the plot style (this prevents the seaborn style from + # being applied to other model vs. obs plotting scripts) + plt.style.use("default") + def make_benchmark_models_vs_ebas_o3_plots( obs_filepaths, diff --git a/gcpy/benchmark/modules/benchmark_models_vs_sondes.py b/gcpy/benchmark/modules/benchmark_models_vs_sondes.py index 4f566cca..f0159a9f 100644 --- a/gcpy/benchmark/modules/benchmark_models_vs_sondes.py +++ b/gcpy/benchmark/modules/benchmark_models_vs_sondes.py @@ -322,6 +322,23 @@ def page_adjustments(fig): ) +def sort_sites_by_lat( + obs_data +): + """ + Returns a list of sonde sites sorted from N to S in latitude. + + Args + obs_data : pd.DataFrame : Observations from sondes (all sites) + + Returns + site_names : list : Sorted list of site names N to S + """ + sites = obs_data[['Site', "lat"]].sort_values(by=["lat"], ascending=False) + + return sites["Site"].unique() + + def plot_the_data( obs_data, obs_site_metadata, @@ -365,8 +382,9 @@ def plot_the_data( 'SON': [9, 10, 11] } - # Unique site names (needed for loops below) - sorted_sites = sorted(obs_data['Site'].unique()) + + # List of site names from N to S latitude + sorted_sites = sort_sites_by_lat(obs_data) # Creating the PDF with no space between subplots # Open the plot as a PDF document diff --git a/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py index 7b3d033e..ccaa1fd7 100644 --- a/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py +++ b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py @@ -262,12 +262,12 @@ def run_benchmark(config, bmk_year_ref, bmk_year_dev): # Observational data files # ====================================================================== sondes_data_file = os.path.join( - config["paths"]["sondes"]["data_dir"], - config["paths"]["sondes"]["data_file"], + config["paths"]["obs_data"]["sondes"]["data_dir"], + config["paths"]["obs_data"]["sondes"]["data_file"], ) sondes_site_file = os.path.join( - config["paths"]["sondes"]["data_dir"], - config["paths"]["sondes"]["site_file"], + config["paths"]["obs_data"]["sondes"]["data_dir"], + config["paths"]["obs_data"]["sondes"]["site_file"], ) ######################################################################## @@ -937,7 +937,6 @@ def gcc_vs_gcc_ops_budg(mon): overwrite=True, ) - # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Create GCHP vs GCC benchmark plots and tables # @@ -1581,7 +1580,7 @@ def gchp_vs_gcc_ops_budg(mon): ref, config["data"]["dev"]["gcc"]["version"], dev, - config["data"]["dev"]["gcc"]["version"], + config["data"]["dev"]["gchp"]["version"], dst=gchp_vs_gcc_models_vs_obs_dir, overwrite=True, ) @@ -2247,7 +2246,7 @@ def gchp_vs_gchp_ops_budg(mon): make_benchmark_models_vs_ebas_o3_plots( config["paths"]["obs_data"]["ebas_o3"]["data_dir"], ref, - config["data"]["ref"]["version"], + config["data"]["ref"]["gchp"]["version"], dev, config["data"]["dev"]["gchp"]["version"], dst=gchp_vs_gchp_models_vs_obs_dir, @@ -2260,9 +2259,9 @@ def gchp_vs_gchp_ops_budg(mon): sondes_data_file, sondes_site_file, ref, - config["data"]["dev"]["gcc"]["version"], + config["data"]["ref"]["gchp"]["version"], dev, - config["data"]["dev"]["gcc"]["version"], + config["data"]["dev"]["gchp"]["version"], dst=gchp_vs_gchp_models_vs_obs_dir, overwrite=True, ) From 8dd83d52ae68221d6e1d19a0b15a9dc26693bddf Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Thu, 8 Feb 2024 14:41:46 -0500 Subject: [PATCH 13/14] Removed extraneous text in CHANGELOG.md CHANGELOG.md - Removed ">>>>>>> dev", this was left over from a git merge Signed-off-by: Bob Yantosca --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 83a40f6f..da2148b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,7 +66,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [1.4.1] - 2023-12-08 ### Fixed - Now use the proper default value for the `--weightsdir` argument to `gcpy/file_regrid.py` ->>>>>>> dev ## [1.4.0] - 2023-11-20 ### Added From 567342f096b9962575a8ef581b45bd7a7b3d418a Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Thu, 8 Feb 2024 17:22:14 -0500 Subject: [PATCH 14/14] Remove references to EBAS O3 to make plotting script more general gcpy/benchmark/config/1yr_fullchem_benchmark.yml - Add obs_data.ebas_o3.data_label YAML tag. This specifies the top-of-page plot title in the model vs. obs plots. gcpy/benchmark/modules/benchmark_models_vs_ebas_o3.py - Renamed to benchmark_models_vs_obs.py gcpy/benchmark/modules/benchmark_models_vs_obs.py - Renamed from benchmark_models_vs_ebas_o3.py, in order to make this script more generally applicable. - The main driver routine is now make_benchmark_models_vs_obs_plots - Now pass obs_data_label to relevant routines for the top-of-page plot titles - Update Pydoc header style to be less verbose gcpy/benchmark/modules/benchmark_models_vs_sondes.py - Updated Pydoc comments for consistency gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py - Now call make_benchmark_models_vs_obs_plots - Pass the config["obs_data"]["ebas_o3"]["data_label"] to make_benchmark_models_vs_obs_plots - Remove references to EBAS O3 in comments CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- CHANGELOG.md | 5 +- .../config/1yr_fullchem_benchmark.yml | 1 + ..._ebas_o3.py => benchmark_models_vs_obs.py} | 311 ++++++------------ .../modules/benchmark_models_vs_sondes.py | 4 +- .../modules/run_1yr_fullchem_benchmark.py | 19 +- 5 files changed, 119 insertions(+), 221 deletions(-) rename gcpy/benchmark/modules/{benchmark_models_vs_ebas_o3.py => benchmark_models_vs_obs.py} (73%) diff --git a/CHANGELOG.md b/CHANGELOG.md index da2148b8..f8b07a69 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,9 +30,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Script `benchmark_model_vs_obs.py` now uses grid inquiry functions from `grid.py` to return data nearest to a (lat,lon) location - Moved routine `get_geoschem_level_metadata` to `gcpy/benchmark/modules/benchmark_utils.py` - Refactored `get_vert_grid.py` (in `gcpy/grid.py`) to accept the `p_sfc` argument; Also never-nested the if-block logic. -- Renamed `gcpy/benchmark/modules/benchmark_models_vs_obs.py` to `benchmark_models_vs_ebas_o3.py` -- `benchmark_slurm.sh` script now saves output to a log file with the - same base name as the YAML config file +- `benchmark_slurm.sh` script now saves output to a log file with the same base name as the YAML config file +- `benchmark_models_vs_obs.py` now reads the observational data paths and metadata from `1yr_fullchem_benchmark.yml` ### Fixed - CS inquiry functions in `gcpy/cstools.py` now work properly for `xr.Dataset` and `xr.DataArray` objects diff --git a/gcpy/benchmark/config/1yr_fullchem_benchmark.yml b/gcpy/benchmark/config/1yr_fullchem_benchmark.yml index 3238193a..aae845ac 100644 --- a/gcpy/benchmark/config/1yr_fullchem_benchmark.yml +++ b/gcpy/benchmark/config/1yr_fullchem_benchmark.yml @@ -37,6 +37,7 @@ paths: obs_data: ebas_o3: data_dir: /n/jacob_lab/Lab/obs_data_for_bmk/ebas_sfc_o3_2019 + data_label: "O3 (EBAS, 2019)" sondes: data_dir: /n/jacob_lab/Lab/obs_data_for_bmk/sondes_2010-2019 data_file: allozonesondes_2010-2019.csv diff --git a/gcpy/benchmark/modules/benchmark_models_vs_ebas_o3.py b/gcpy/benchmark/modules/benchmark_models_vs_obs.py similarity index 73% rename from gcpy/benchmark/modules/benchmark_models_vs_ebas_o3.py rename to gcpy/benchmark/modules/benchmark_models_vs_obs.py index 69f39507..f5b82c05 100644 --- a/gcpy/benchmark/modules/benchmark_models_vs_ebas_o3.py +++ b/gcpy/benchmark/modules/benchmark_models_vs_obs.py @@ -37,22 +37,13 @@ def read_nas( Creates data frame of O3 values converted to ppb and dictionary with key site information (name, lat, lon, altitude) - Args: - ----- - input_file : str - Path to data file with observational data (e.g. sonde data). - Keyword Args: - ------------- - verbose : bool - Toggles verbose printout on (True) or off (False). - Default value: False + Args + input_file : str : Path to observational data file + verbose : bool : Toggles verbose output on/off - Returns: - -------- - obs_dataframe : pandas.DataFrame - Dataframe containing observational data from input_file. - obs_site_coords : dict - Dictionary containing formatted site name: lon, lat and altitude. + Returns + obs_dataframe : pd.DataFrame : Observations at each station site + obs_site_coords : dict : Coords (lon/lat/alt) at each site """ verify_variable_type(input_file, str) @@ -142,21 +133,13 @@ def read_observational_data( Loops over all data files (in NASA/Ames format) within a folder and concatenates them into a single DataFrame. - Args: - ----- - path : str - Path to the observational data directory - verbose : bool - Toggles verbose printout on (True) or off (False). - Default value: False + Args + path : str : Path to the observational data dir + verbose : bool : Toggles verbose output on/off - Returns: - -------- - obs_dataframe : pandas.DataFrame - DataFrame object with the observational data (i.e. station - names, data, metadata). - obs_site_coords : dict - Dictionary with coordinates of each observation site + Returns + obs_dataframe : pd.DataFrame : Observations at each station site + obs_site_coords : dict : Coords (lon/lat/alt) at each site """ verify_variable_type(path, str) @@ -202,24 +185,13 @@ def read_model_data( "SpeciesConcVV" or "SpeciesConc". This is necessary for backwards compatitbility with GEOS-Chem output prior to version 14.1.0. - Args: - ----- - filepaths : list of str - List of data files to read. - varname : str or list of str - Variable name(s) to read from data files. - - Keyword Args: - ------------- - varbose : bool - Toggles verbose output on (True) or off (False). - Default value: False + Args + filepaths : list : List of GEOS-Chem data files to read + varname : str : Variable name(s) to read + verbose : bool : Toggles verbose output on/off - Returns: - -------- - dataarray : xarray.DataArray - DataArray object containing data read from files - specified by the filepaths argument. + Returns + dataarray : xr.DataArray : GEOS-Chem data read from disk """ # Read the Ref and Dev model data @@ -278,19 +250,14 @@ def find_times( Convert timestamps in nasa ames data files to python datetime objects and set DataFrame index to the new datetime array. - Args: - ---------- - obs_dataframe : pandas.DataFrame - DataFrame with O3 values from GAW site - start_time : str - Reference start time for timestamp taken from nasa ames file + Args + obs_dataframe : pd.DataFrame : Observations from GAW sites + start_time : str : Reference start time for obs data Returns - ------ - obs_dataframe: pandas.DataFrame - O3 in ppbV with datetime index - qcflag : pandas.Dataframe - QC flag with datetime index + obs_dataframe : pd.DataFrame : Observations (ppbv) w/ datetime index + qcflag : pd.DataFrame : Quality control info w/ datetime index + start_time : str : Reference start time for obs data """ end_time = obs_dataframe[obs_dataframe.columns[1]] time_x = [] @@ -318,29 +285,16 @@ def get_nearest_model_data_to_obs( Returns GEOS-Chem model data (on a cubed-sphere grid) at the grid box closest to an observation site location. - Args: - ----- - gc_data : xarray.DataSet - GEOS-Chem output to be processed - gc_levels: pandas.DataFrame - Altitudes of GEOS-Chem levels in meters - lon_value : float - GAW site longitude - lat_value : float - GAW site latitude - alt_value : float - GAW site altitude - - Keyword Args: (Optional) - ------------------------ - gc_cs_grid : xarray.Dataset or NoneType - Dictionary containing the cubed-sphere grid definition - (or None if gc_data is not placed on a cubed-sphere grid). + Args + gc_data : pd.DataFrame : Observations at each station site + gc_levels : pd.DataFrame : Metadata for model vertical levels + lon_value : float : Longitude of station site + lat_value : float : Latitude of station site + alt_value : float : Altitude of station site + gc_cs_grid : xr.Dataset : Metadata for Dev cubed-sphere grid - Returns: - -------- - dataframe: pandas.DataFrame - Model data closest to the observation site. + Returns + dataframe : pd.DataFrame : Model data closest to the station site """ verify_variable_type(gc_data, xr.DataArray) verify_variable_type(gc_cs_grid, (xr.Dataset, type(None))) @@ -391,39 +345,24 @@ def prepare_data_for_plot( (3) Creates the top-of-plot title for the given station site. Args: - ----- - obs_dataframe : pandas.DataFrame - Observations at each station site. - obs_site_coords : dict - Coordinates (lon, lat, alt) for each observation station site. - obs_site_name : str - Name of the observation station site. - ref_dataarray, dev_dataarray : xarray.DataArray - Data from the Ref and Dev model versions. - ref_cs_grid, dev_cs_grid : xarray.Dataset or NoneType - Dictionary containing the cubed-sphere grid definitions for - ref_dataarray and dev_dataarray (or None if ref_dataarray or - dev_dataarray are not placed on a cubed-sphere grid). - gc_level_alts_m : pandas Series - Metadata pertaining to GEOS-Chem vertical levels - - Keyword Args (Optional) - ----------------------- - varname : str - GEOS-Chem diagnostic name for the Ref and Dev model data. - Default value: "SpeciesConcVV_O3" + obs_dataframe : pd.DataFrame : Observations at each station site + obs_site_coords : dict : Coords (lon/lat/alt) at each site + obs_site_name : str : Names of station sites + ref_dataarray : xr.DataArray : Data from the Ref model version + ref_label : str : Label for the Ref model data + ref_cs_grid : xr.Dataset : Metadata for Ref cubed-sphere grid + dev_dataarray : xr.DataArray : Data from the Dev model version + dev_label : str : Label for the Dev model data + dev_cs_grid : xr.Dataset : Metadata for Dev cubed-sphere grid + gc_levels : pd.DataFrame : Metadata for model vertical levels + varname : str : Variable name for model data Returns: - -------- - obs_dataframe : pandas.DataFrame - Meanb observational data at the given station site. - ref_series, dev_series : pandas Series - Data from the Ref and Dev model versions at the - closest grid box to the observation station site. - subplot_title : str - Plot title string for the given observation station site. - subplot_ylabel : str - Label for the Y-axis (e.g. species name). + obs_dataframe : pd.DataFrame : Mean observational data @ station site + ref_series : pd.Series : Ref data nearest to the station site + dev_series : pd.Series : Dev data nearest to the station site + subplot_title : str : Title for the station site subplot + subplot_ylabel : str : Y-axis titel for the station site subplot """ verify_variable_type(obs_dataframe, pd.DataFrame) verify_variable_type(obs_site_coords, dict) @@ -496,6 +435,7 @@ def plot_single_station( subplot_title, subplot_ylabel, obs_dataframe, + obs_label, obs_site_name, ref_series, ref_label, @@ -506,26 +446,18 @@ def plot_single_station( Plots observation data vs. model data at a single station site. Args: - ----- - fig : matplotlib.figure.Figure - Matplotlib Figure object containing the plot. - rows_per_page, cols_per_page : int - Number of rows and columns on each page of the plot. - subplot_index : int - Index of the subplot on the page. Runs from 0 to - (cols_per_page * rows_per_page - 1). - subplot_title, subplot_ylabel : str - Top title and y-axis label for each subplot - obs_dataframe : pandas.DataFrame - Observational data. - obs_site_name: : str - Name of the observation station site. - ref_series, dev_series : pandas Series - GEOS-Chem data at closest grid box to the observation - station site for the Ref and Dev model versions. - ref_label, dev_label : str - Descriptive labels (e.g. version numbers) for the - GEOS-Chem Ref and Dev model versions. + obs_dataframe : mpl.figure.Figure : Figure object for the plot + rows_per_page : int : # of rows to plot on a page + cols_per_page : int : # of columns to plot on a page + subplot_index : int : Index of each subplot + subplot_title : str : Title for each subplot + subplot_ylabel : str : Y-axis title for each subplot + obs_dataframe : pd.DataFrame : Observations at each station site + obs_site_name : str : Name of the station site + ref_series : pd.Series : Data from the Ref model version + ref_label : str : Label for the Ref model data + dev_dataarray : pd.Series : Data from the Dev model version + dev_label : str : Label for the Dev model data """ verify_variable_type(fig, Figure) verify_variable_type(rows_per_page, int) @@ -562,7 +494,7 @@ def plot_single_station( marker='^', markersize=4, lw=1, - label='Surface O3 (EBAS, 2019)' + label=f"{obs_label}" ) # Plot model data @@ -620,6 +552,7 @@ def plot_single_station( def plot_one_page( pdf, obs_dataframe, + obs_label, obs_site_coords, obs_site_names, ref_dataarray, @@ -637,35 +570,19 @@ def plot_one_page( Plots a single page of models vs. observations. Args: - ----- - obs_dataframe : pandas.DataFrame - Observations at each station site. - obs_site_coords : dict - Coordinates (lon, lat, alt) for each observation station site. - obs_site_names : list of str - Names of observation station sites that fit onto a single page. - ref_dataarray, dev_dataarray : xarray.DataArray - Data from the Ref and Dev model versions. - ref_label, dev_label: str - Labels describing the Ref and Dev datasets (e.g. version numbers) - ref_cs_grid, dev_cs_grid : xarray.Dataset or NoneType - Dictionary containing the cubed-sphere grid definitions for - ref_dataarray and dev_dataarray (or None if ref_dataarray or - dev_dataarray are not placed on a cubed-sphere grid). - gc_levels : pandas.DataFrame - Metadata pertaining to GEOS-Chem vertical levels - - Keyword Args: - ------------- - rows_per_page, cols_per_page : int - Number of rows and columns to plot on a single page. - Default values: 3 rows, 3 columns - varname : str - Variable name for GEOS-Chem diagnostic data. - Default value: "SpeciesConcVV_O3" - verbose : bool - Toggles verbose printout on (True) or off (False). - Default value: False + obs_dataframe : pd.DataFrame : Observations at each station site. + obs_label : str : Label for the observational data + obs_site_coords : dict : Coords (lon/lat/alt) at each site. + obs_site_names : list : Names of station sites per page + ref_dataarray : xr.DataArray : Data from the Ref model version + ref_label : str : Label for the Ref model data + ref_cs_grid : str|None : Metadata for Ref cubed-sphere grid + dev_dataarray : xr.DataArray : Data from the Dev model version + dev_label : str : Label for the Dev model data + dev_cs_grid : str|None : Metadata for Dev cubed-sphere grid + gc_levels : pd.DataFrame : Metadata for model vertical levels + rows_per_page : int : Number of rows to plot on a page + varname : str : Variable name for model data """ verify_variable_type(obs_dataframe, pd.DataFrame) verify_variable_type(obs_site_coords, dict) @@ -712,6 +629,7 @@ def plot_one_page( subplot_title, # str subplot_ylabel, # str obs_dataframe, # pandas.Dataframe + obs_label, # str obs_site_name, # str ref_series, # pandas.Series ref_label, # str @@ -739,6 +657,7 @@ def plot_one_page( def plot_models_vs_obs( obs_dataframe, + obs_label, obs_site_coords, ref_dataarray, ref_label, @@ -747,35 +666,21 @@ def plot_models_vs_obs( gc_levels, varname="SpeciesConcVV_O3", dst="./benchmark", - **kwargs ): """ Plots models vs. observations using a 3 rows x 3 column layout. Args: - ----- - obs_dataframe : pandas.DataFrame - Observations at each station site. - obs_site_coords : dict - Coordinates (lon, lat, alt) for each observation station site. - ref_dataarray, dev_dataarray : xarray.DataArray - Data from the Ref and Dev model versions. - ref_label, dev_label: str - Labels describing the Ref and Dev datasets (e.g. version numbers) - gc_levels : pandas.DataFrame - Metadata pertaining to GEOS-Chem vertical levels - - Keyword Args: - ------------- - varname : str - Variable name for GEOS-Chem diagnostic data. - Default value: "SpeciesConcVV_O3" - dst : str - Root folder where output will be created. - Default value: "./benchmark" - verbose : bool - Toggles verbose printout on (True) or off (False). - Default value: False + obs_dataframe : pd.DataFrame : Observations at each station site. + obs_label : str : Label for the observational data + obs_site_coords : dict : Coords (lon/lat/alt) at each site. + ref_dataarray : xr.DataArray : Data from the Ref model version + ref_label : str : Label for the Ref model data + dev_dataarray : xr.DataArray : Data from the Dev model version + dev_label : str : Label for the Dev model data + gc_levels : pd.DataFrame : Metadata for model vertical levels + varname : str : Variable name for model data + dst : str : Destination folder for plots """ verify_variable_type(obs_dataframe, pd.DataFrame) verify_variable_type(obs_site_coords, dict) @@ -820,6 +725,7 @@ def plot_models_vs_obs( plot_one_page( pdf, # PdfPages obs_dataframe, # pandas.DataFrame + obs_label, # str obs_site_coords, # dict obs_site_names[start:end+1], # list of str ref_dataarray, # xarray.DataArray @@ -842,8 +748,9 @@ def plot_models_vs_obs( plt.style.use("default") -def make_benchmark_models_vs_ebas_o3_plots( +def make_benchmark_models_vs_obs_plots( obs_filepaths, + obs_label, ref_filepaths, ref_label, dev_filepaths, @@ -857,28 +764,16 @@ def make_benchmark_models_vs_ebas_o3_plots( Driver routine to create model vs. observation plots. Args: - ----- - obs_filepaths : str or list - Path(s) to the observational data. - ref_filepaths, dev_filepaths: str or list - Path(s) to the Ref and Dev model versions to be compared. - ref_label, dev_label : str - Descriptive labels (e.g. for version numbers) for the - Ref and Dev model data. - - Keyword Args (optional): - ------------------------ - varname : str - Variable name for model data to be plotted against - observations. Default value: "SpeciesConcVV_O3". - dst : str - Path to the root folder where plots will be created. - verbose : bool - Toggles verbose printout on (True) or off (False). - Default value: False - overwrite : bool - Toggles whether plots should be overwritten (True) - or not (False). Default value: True + obs_filepaths : str|list : Path(s) to the observational data. + obs_label : str : Label for the observational data + ref_filepaths : str : Paths to the Ref model data + ref_label : str : Label for the Ref model data + dev_filepaths : str : Paths to the Dev model data + dev_label : str : Label for the Dev model data + varname : str : Variable name for model data + dst : str : Destination folder for plots + verbose : bool : Toggles verbose output on/off + overwrite : bool : Toggles overwriting contents of dst """ verify_variable_type(obs_filepaths, (str, list)) verify_variable_type(ref_filepaths, (str, list)) @@ -917,6 +812,7 @@ def make_benchmark_models_vs_ebas_o3_plots( # Plot data vs observations plot_models_vs_obs( obs_dataframe, # pandas.DataFrame + obs_label, # str obs_site_coords, # dict ref_dataarray, # xarray.DataArray ref_label, # str @@ -925,5 +821,4 @@ def make_benchmark_models_vs_ebas_o3_plots( gc_levels, # pandas.DataFrame varname=varname, # str dst=dst, # str - verbose=verbose # bool ) diff --git a/gcpy/benchmark/modules/benchmark_models_vs_sondes.py b/gcpy/benchmark/modules/benchmark_models_vs_sondes.py index f0159a9f..71c3e3d2 100644 --- a/gcpy/benchmark/modules/benchmark_models_vs_sondes.py +++ b/gcpy/benchmark/modules/benchmark_models_vs_sondes.py @@ -294,7 +294,7 @@ def page_adjustments(fig): """ Adjusts the page settings after all the subplots have been made. - Args: + Args fig : mpl.Figure : Figure object """ @@ -494,7 +494,7 @@ def make_benchmark_models_vs_sondes_plots( Creates plots of sonde data vs. GEOS-Chem output. For use in the 1-year benchmark plotting workflow. - Args: + Args obs_data_file : str : File containing sonde data obs_site_file : str : File containing sonde site metadata ref_filepaths : str|list : Files for the GEOS-Chem Ref version diff --git a/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py index ccaa1fd7..67748f34 100644 --- a/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py +++ b/gcpy/benchmark/modules/run_1yr_fullchem_benchmark.py @@ -63,8 +63,8 @@ import gcpy.budget_ox as ox from gcpy import benchmark_funcs as bmk import gcpy.benchmark.modules.benchmark_utils as bmk_util -from gcpy.benchmark.modules.benchmark_models_vs_ebas_o3 \ - import make_benchmark_models_vs_ebas_o3_plots +from gcpy.benchmark.modules.benchmark_models_vs_obs \ + import make_benchmark_models_vs_obs_plots from gcpy.benchmark.modules.benchmark_models_vs_sondes \ import make_benchmark_models_vs_sondes_plots #TODO: Peel out routines from benchmark_funcs.py into smaller @@ -913,9 +913,10 @@ def gcc_vs_gcc_ops_budg(mon): all_months_dev )[0] - # Plot models vs. EBAS O3 observations - make_benchmark_models_vs_ebas_o3_plots( + # Plot models vs. observations + make_benchmark_models_vs_obs_plots( config["paths"]["obs_data"]["ebas_o3"]["data_dir"], + config["paths"]["obs_data"]["ebas_o3"]["data_label"], ref, config["data"]["ref"]["gcc"]["version"], dev, @@ -1561,9 +1562,10 @@ def gchp_vs_gcc_ops_budg(mon): is_gchp=True )[0] - # Plot models vs. EBAS O3 observations - make_benchmark_models_vs_ebas_o3_plots( + # Plot models vs. observations + make_benchmark_models_vs_obs_plots( config["paths"]["obs_data"]["ebas_o3"]["data_dir"], + config["paths"]["obs_data"]["ebas_o3"]["data_label"], ref, config["data"]["dev"]["gcc"]["version"], dev, @@ -2242,9 +2244,10 @@ def gchp_vs_gchp_ops_budg(mon): is_gchp=True )[0] - # Plot models vs. EBAS O3 observations - make_benchmark_models_vs_ebas_o3_plots( + # Plot models vs. observations + make_benchmark_models_vs_obs_plots( config["paths"]["obs_data"]["ebas_o3"]["data_dir"], + config["paths"]["obs_data"]["ebas_o3"]["data_label"], ref, config["data"]["ref"]["gchp"]["version"], dev,