Skip to content

Commit

Permalink
Merge pull request #214 from NREL/qdm_smooth
Browse files Browse the repository at this point in the history
Applying fill_and_smooth on QDM
  • Loading branch information
castelao authored May 6, 2024
2 parents 58691ac + 4c188ef commit c543d2e
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 82 deletions.
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ indent-width = 4
target-version = "py38"

[tool.ruff.lint]
fixable = ["ALL"]
preview = true
select = [
"E", # pycodestyle
]
ignore = [
"B008", # function-call-in-default-argument
"B024", # abstract-base-class-without-abstract-method
Expand Down
175 changes: 93 additions & 82 deletions sup3r/bias/bias_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,7 +721,91 @@ def _reduce_base_data(base_ti, base_data, base_cs_ghi, base_dset,
return base_data, daily_ti


class LinearCorrection(DataRetrievalBase):
class FillAndSmoothMixin():
"""Fill and extend parameters for calibration on missing positions"""
def fill_and_smooth(self,
out,
fill_extend=True,
smooth_extend=0,
smooth_interior=0):
"""For a given set of parameters, fill and extend missing positions
Fill data extending beyond the base meta data extent by doing a
nearest neighbor gap fill. Smooth interior and extended region with
given smoothing values.
Interior smoothing can reduce the affect of extreme values
within aggregations over large number of pixels.
The interior is assumed to be defined by the region without nan values.
The extended region is assumed to be the region with nan values.
Parameters
----------
out : dict
Dictionary of values defining the mean/std of the bias + base
data and the scalar + adder factors to correct the biased data
like: bias_data * scalar + adder. Each value is of shape
(lat, lon, time).
fill_extend : bool
Whether to fill data extending beyond the base meta data with
nearest neighbor values.
smooth_extend : float
Option to smooth the scalar/adder data outside of the spatial
domain set by the threshold input. This alleviates the weird seams
far from the domain of interest. This value is the standard
deviation for the gaussian_filter kernel
smooth_interior : float
Value to use to smooth the scalar/adder data inside of the spatial
domain set by the threshold input. This can reduce the effect of
extreme values within aggregations over large number of pixels.
This value is the standard deviation for the gaussian_filter
kernel.
Returns
-------
out : dict
Dictionary of values defining the mean/std of the bias + base
data and the scalar + adder factors to correct the biased data
like: bias_data * scalar + adder. Each value is of shape
(lat, lon, time).
"""
if len(self.bad_bias_gids) > 0:
logger.info('Found {} bias gids that are out of bounds: {}'
.format(len(self.bad_bias_gids), self.bad_bias_gids))

for key, arr in out.items():
nan_mask = np.isnan(arr[..., 0])
for idt in range(arr.shape[-1]):

arr_smooth = arr[..., idt]

needs_fill = (np.isnan(arr_smooth).any()
and fill_extend) or smooth_interior > 0

if needs_fill:
logger.info('Filling NaN values outside of valid spatial '
'extent for dataset "{}" for timestep {}'
.format(key, idt))
arr_smooth = nn_fill_array(arr_smooth)

arr_smooth_int = arr_smooth_ext = arr_smooth

if smooth_extend > 0:
arr_smooth_ext = gaussian_filter(arr_smooth_ext,
smooth_extend,
mode='nearest')

if smooth_interior > 0:
arr_smooth_int = gaussian_filter(arr_smooth_int,
smooth_interior,
mode='nearest')

out[key][nan_mask, idt] = arr_smooth_ext[nan_mask]
out[key][~nan_mask, idt] = arr_smooth_int[~nan_mask]

return out


class LinearCorrection(FillAndSmoothMixin, DataRetrievalBase):
"""Calculate linear correction *scalar +adder factors to bias correct data
This calculation operates on single bias sites for the full time series of
Expand Down Expand Up @@ -820,85 +904,6 @@ def _run_single(cls,
base_dset)
return out

def fill_and_smooth(self,
out,
fill_extend=True,
smooth_extend=0,
smooth_interior=0):
"""Fill data extending beyond the base meta data extent by doing a
nearest neighbor gap fill. Smooth interior and extended region with
given smoothing values.
Interior smoothing can reduce the affect of extreme values
within aggregations over large number of pixels.
The interior is assumed to be defined by the region without nan values.
The extended region is assumed to be the region with nan values.
Parameters
----------
out : dict
Dictionary of values defining the mean/std of the bias + base
data and the scalar + adder factors to correct the biased data
like: bias_data * scalar + adder. Each value is of shape
(lat, lon, time).
fill_extend : bool
Whether to fill data extending beyond the base meta data with
nearest neighbor values.
smooth_extend : float
Option to smooth the scalar/adder data outside of the spatial
domain set by the threshold input. This alleviates the weird seams
far from the domain of interest. This value is the standard
deviation for the gaussian_filter kernel
smooth_interior : float
Value to use to smooth the scalar/adder data inside of the spatial
domain set by the threshold input. This can reduce the effect of
extreme values within aggregations over large number of pixels.
This value is the standard deviation for the gaussian_filter
kernel.
Returns
-------
out : dict
Dictionary of values defining the mean/std of the bias + base
data and the scalar + adder factors to correct the biased data
like: bias_data * scalar + adder. Each value is of shape
(lat, lon, time).
"""
if len(self.bad_bias_gids) > 0:
logger.info('Found {} bias gids that are out of bounds: {}'
.format(len(self.bad_bias_gids), self.bad_bias_gids))

for key, arr in out.items():
nan_mask = np.isnan(arr[..., 0])
for idt in range(arr.shape[-1]):

arr_smooth = arr[..., idt]

needs_fill = (np.isnan(arr_smooth).any()
and fill_extend) or smooth_interior > 0

if needs_fill:
logger.info('Filling NaN values outside of valid spatial '
'extent for dataset "{}" for timestep {}'
.format(key, idt))
arr_smooth = nn_fill_array(arr_smooth)

arr_smooth_int = arr_smooth_ext = arr_smooth

if smooth_extend > 0:
arr_smooth_ext = gaussian_filter(arr_smooth_ext,
smooth_extend,
mode='nearest')

if smooth_interior > 0:
arr_smooth_int = gaussian_filter(arr_smooth_int,
smooth_interior,
mode='nearest')

out[key][nan_mask, idt] = arr_smooth_ext[nan_mask]
out[key][~nan_mask, idt] = arr_smooth_int[~nan_mask]

return out

def write_outputs(self, fp_out, out):
"""Write outputs to an .h5 file.
Expand Down Expand Up @@ -1176,7 +1181,7 @@ def get_linear_correction(bias_data, base_data, bias_feature, base_dset):
return out


class QuantileDeltaMappingCorrection(DataRetrievalBase):
class QuantileDeltaMappingCorrection(FillAndSmoothMixin, DataRetrievalBase):
"""Estimate probability distributions required by Quantile Delta Mapping
The main purpose of this class is to estimate the probability
Expand Down Expand Up @@ -1457,7 +1462,10 @@ def write_outputs(self, fp_out, out=None):
def run(self,
fp_out=None,
max_workers=None,
daily_reduction='avg'):
daily_reduction='avg',
fill_extend=True,
smooth_extend=0,
smooth_interior=0):
"""Estimate the statistical distributions for each location
Parameters
Expand Down Expand Up @@ -1572,6 +1580,9 @@ def run(self,

logger.info('Finished calculating bias correction factors.')

self.out = self.fill_and_smooth(self.out, fill_extend, smooth_extend,
smooth_interior)

self.write_outputs(fp_out, self.out)

return copy.deepcopy(self.out)
Expand Down
21 changes: 21 additions & 0 deletions tests/bias/test_qdm_bias_correction.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,27 @@ def test_parallel(fp_fut_cc):
), f"Different results for {k}"


def test_fill_nan(fp_fut_cc):
"""No NaN when running with fill_extend"""

c = QuantileDeltaMappingCorrection(FP_NSRDB, FP_CC, fp_fut_cc,
'ghi', 'rsds',
target=TARGET, shape=SHAPE,
distance_upper_bound=0.7,
bias_handler='DataHandlerNCforCC')

# Without filling, at least one NaN or this test is useless.
out = c.run(fill_extend=False)
assert np.all([np.isnan(v).any() for v in out.values()]), (
"Assume at least one NaN value for each param"
)

out = c.run()
assert ~np.any([np.isnan(v) for v in out.values()]), (
"All NaN values where supposed to be filled"
)


def test_save_file(tmp_path, fp_fut_cc):
"""Save valid output
Expand Down

0 comments on commit c543d2e

Please sign in to comment.