From c21ed3ca310cb5fe0b6dd040eb5cb7e2f71ab467 Mon Sep 17 00:00:00 2001 From: "Brandon N. Benton" Date: Tue, 3 Sep 2024 21:05:39 -0600 Subject: [PATCH] added time_shift argument to deriver and data handler so that daily data time index can be shifted to start at the beginning of the day instead of at noon. GCM data frequently stamps daily data at noon instead of the beginning of the day. This caused an issue with the solar module thinking that given gan data had 48 time steps, since the time index had two unique day values, even though there were only 24 time steps from noon to noon on each day. --- sup3r/bias/bias_calc_vortex.py | 12 ++++++------ sup3r/preprocessing/data_handlers/factory.py | 9 ++++++++- sup3r/preprocessing/derivers/base.py | 14 +++++++++++++- sup3r/solar/solar_cli.py | 6 +++++- 4 files changed, 32 insertions(+), 9 deletions(-) diff --git a/sup3r/bias/bias_calc_vortex.py b/sup3r/bias/bias_calc_vortex.py index c63ffd8a2e..86fc90bad2 100644 --- a/sup3r/bias/bias_calc_vortex.py +++ b/sup3r/bias/bias_calc_vortex.py @@ -12,12 +12,12 @@ import dask import numpy as np import pandas as pd +import xarray as xr from rex import Resource from scipy.interpolate import interp1d from sup3r.postprocessing import OutputHandler, RexOutputs from sup3r.utilities import VERSION_RECORD -from sup3r.utilities.utilities import xr_open_mfdataset logger = logging.getLogger(__name__) @@ -114,7 +114,7 @@ def convert_month_height_tif(self, month, height): os.remove(outfile) if not os.path.exists(outfile) or self.overwrite: - ds = xr_open_mfdataset(infile) + ds = xr.open_mfdataset(infile) ds = ds.rename( { 'band_data': f'windspeed_{height}m', @@ -142,7 +142,7 @@ def convert_all_tifs(self): def mask(self): """Mask coordinates without data""" if self._mask is None: - with xr_open_mfdataset(self.get_height_files('January')) as res: + with xr.open_mfdataset(self.get_height_files('January')) as res: mask = (res[self.in_features[0]] != -999) & ( ~np.isnan(res[self.in_features[0]]) ) @@ -173,13 +173,13 @@ def get_month(self, month): if os.path.exists(month_file) and not self.overwrite: logger.info(f'Loading month_file {month_file}.') - data = xr_open_mfdataset(month_file) + data = xr.open_mfdataset(month_file) else: logger.info( 'Getting mean windspeed for all heights ' f'({self.in_heights}) for {month}' ) - data = xr_open_mfdataset(self.get_height_files(month)) + data = xr.open_mfdataset(self.get_height_files(month)) logger.info( 'Interpolating windspeed for all heights ' f'({self.out_heights}) for {month}.' @@ -239,7 +239,7 @@ def interp(self, data): def get_lat_lon(self): """Get lat lon grid""" - with xr_open_mfdataset(self.get_height_files('January')) as res: + with xr.open_mfdataset(self.get_height_files('January')) as res: lons, lats = np.meshgrid( res['longitude'].values, res['latitude'].values ) diff --git a/sup3r/preprocessing/data_handlers/factory.py b/sup3r/preprocessing/data_handlers/factory.py index 19428cb089..b73a2d5ae6 100644 --- a/sup3r/preprocessing/data_handlers/factory.py +++ b/sup3r/preprocessing/data_handlers/factory.py @@ -50,6 +50,7 @@ def __init__( time_slice: Union[slice, tuple, list, None] = slice(None), threshold: Optional[float] = None, time_roll: int = 0, + time_shift: Optional[int] = None, hr_spatial_coarsen: int = 1, nan_method_kwargs: Optional[dict] = None, BaseLoader: Optional[Callable] = None, @@ -91,8 +92,13 @@ def __init__( are more than this value away from the target lat/lon, an error is raised. time_roll : int - Number of steps to shift the time axis. `Passed to + Number of steps to roll along the time axis. `Passed to xr.Dataset.roll()` + time_shift : int | None + Number of minutes to shift time axis. This can be used, for + example, to shift the time index for daily data so that the time + stamp for a given day starts at the zeroth minute instead of at + noon, as is the case for most GCM data. hr_spatial_coarsen : int Spatial coarsening factor. Passed to `xr.Dataset.coarsen()` nan_method_kwargs : str | dict | None @@ -145,6 +151,7 @@ def __init__( data=self.rasterizer.data, features=features, time_roll=time_roll, + time_shift=time_shift, hr_spatial_coarsen=hr_spatial_coarsen, nan_method_kwargs=nan_method_kwargs, FeatureRegistry=FeatureRegistry, diff --git a/sup3r/preprocessing/derivers/base.py b/sup3r/preprocessing/derivers/base.py index 09baa62251..c8e851fad3 100644 --- a/sup3r/preprocessing/derivers/base.py +++ b/sup3r/preprocessing/derivers/base.py @@ -319,6 +319,7 @@ def __init__( data: Union[Sup3rX, Sup3rDataset], features, time_roll=0, + time_shift=None, hr_spatial_coarsen=1, nan_method_kwargs=None, FeatureRegistry=None, @@ -332,8 +333,13 @@ def __init__( features: list List of features to derive time_roll: int - Number of steps to shift the time axis. `Passed to + Number of steps to roll along the time axis. `Passed to xr.Dataset.roll()` + time_shift: int | None + Number of minutes to shift time axis. This can be used, for + example, to shift the time index for daily data so that the time + stamp for a given day starts at the zeroth minute instead of at + noon, as is the case for most GCM data. hr_spatial_coarsen: int Spatial coarsening factor. Passed to `xr.Dataset.coarsen()` nan_method_kwargs: str | dict | None @@ -358,6 +364,12 @@ def __init__( logger.debug('Applying time_roll=%s to data array', time_roll) self.data = self.data.roll(**{Dimension.TIME: time_roll}) + if time_shift is not None: + logger.debug('Applying time_shift=%s to time index', time_shift) + self.data.time_index = self.data.time_index.shift( + time_shift, freq='min' + ) + if hr_spatial_coarsen > 1: logger.debug( 'Applying hr_spatial_coarsen=%s to data.', hr_spatial_coarsen diff --git a/sup3r/solar/solar_cli.py b/sup3r/solar/solar_cli.py index c74f9c59bc..797445078b 100644 --- a/sup3r/solar/solar_cli.py +++ b/sup3r/solar/solar_cli.py @@ -1,4 +1,8 @@ -"""sup3r solar CLI entry points.""" +"""sup3r solar CLI entry points. + +TODO: This should be modified to enable distribution of file groups across +nodes instead of requesting a node for a single file +""" import copy import logging import os