Skip to content

Commit

Permalink
Merge pull request #195 from NREL/gb/precip_skill
Browse files Browse the repository at this point in the history
Gb/precip skill
  • Loading branch information
grantbuster authored Mar 28, 2024
2 parents 986a1fe + 226b67f commit d710fe3
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 12 deletions.
94 changes: 84 additions & 10 deletions sup3r/bias/bias_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ def __init__(self,
bias_handler='DataHandlerNCforCC',
base_handler_kwargs=None,
bias_handler_kwargs=None,
decimals=None):
decimals=None,
match_zero_rate=False):
"""
Parameters
----------
Expand Down Expand Up @@ -94,6 +95,15 @@ class to be retrieved from the rex/sup3r library. If a
decimals, this gets passed to np.around(). If decimals
is negative, it specifies the number of positions to
the left of the decimal point.
match_zero_rate : bool
Option to fix the frequency of zero values in the biased data. The
lowest percentile of values in the biased data will be set to zero
to match the percentile of zeros in the base data. If
SkillAssessment is being run and this is True, the distributions
will not be mean-centered. This helps resolve the issue where
global climate models produce too many days with small
precipitation totals e.g., the "drizzle problem".
Ref: Polade et al., 2014 https://doi.org/10.1038/srep04364
"""

logger.info('Initializing DataRetrievalBase for base dset "{}" '
Expand All @@ -110,6 +120,7 @@ class to be retrieved from the rex/sup3r library. If a
self.bias_handler_kwargs = bias_handler_kwargs or {}
self.bad_bias_gids = []
self._distance_upper_bound = distance_upper_bound
self.match_zero_rate = match_zero_rate

if isinstance(self.base_fps, str):
self.base_fps = sorted(glob(self.base_fps))
Expand Down Expand Up @@ -367,6 +378,7 @@ def get_bias_data(self, bias_gid):
bias_data : np.ndarray
1D array of temporal data at the requested gid.
"""

idx = np.where(self.bias_gid_raster == bias_gid)
if self.bias_dh.data is None:
self.bias_dh.load_cached_data()
Expand Down Expand Up @@ -486,6 +498,46 @@ def get_base_data(cls,

return out_data, out_ti

@staticmethod
def _match_zero_rate(bias_data, base_data):
"""The lowest percentile of values in the biased data will be set to
zero to match the percentile of zeros in the base data. This helps
resolve the issue where global climate models produce too many days
with small precipitation totals e.g., the "drizzle problem".
Ref: Polade et al., 2014 https://doi.org/10.1038/srep04364
Parameters
----------
bias_data : np.ndarray
1D array of biased data observations.
base_data : np.ndarray
1D array of base data observations.
Returns
-------
bias_data : np.ndarray
1D array of biased data observations. Values below the quantile
associated with zeros in base_data will be set to zero
"""

q_zero_base_in = np.nanmean(base_data == 0)
q_zero_bias_in = np.nanmean(bias_data == 0)

q_bias = np.linspace(0, 1, len(bias_data))
min_value_bias = np.interp(q_zero_base_in, q_bias, sorted(bias_data))

bias_data[bias_data < min_value_bias] = 0

q_zero_base_out = np.nanmean(base_data == 0)
q_zero_bias_out = np.nanmean(bias_data == 0)

logger.debug('Input bias/base zero rate is {:.3e}/{:.3e}, '
'output is {:.3e}/{:.3e}'
.format(q_zero_bias_in, q_zero_base_in,
q_zero_bias_out, q_zero_base_out))

return bias_data

@staticmethod
def _read_base_sup3r_data(dh, base_dset, base_gid):
"""Read baseline data from a sup3r DataHandler
Expand Down Expand Up @@ -727,7 +779,8 @@ def _run_single(cls,
daily_reduction,
bias_ti,
decimals,
base_dh_inst=None):
base_dh_inst=None,
match_zero_rate=False):
"""Find the nominal scalar + adder combination to bias correct data
at a single site"""

Expand All @@ -739,6 +792,9 @@ def _run_single(cls,
decimals=decimals,
base_dh_inst=base_dh_inst)

if match_zero_rate:
bias_data = cls._match_zero_rate(bias_data, base_data)

out = cls.get_linear_correction(bias_data, base_data, bias_feature,
base_dset)
return out
Expand Down Expand Up @@ -931,6 +987,7 @@ def run(self,
self.bias_ti,
self.decimals,
base_dh_inst=self.base_dh,
match_zero_rate=self.match_zero_rate,
)
for key, arr in single_out.items():
self.out[key][raster_loc] = arr
Expand Down Expand Up @@ -963,6 +1020,7 @@ def run(self,
daily_reduction,
self.bias_ti,
self.decimals,
match_zero_rate=self.match_zero_rate,
)
futures[future] = raster_loc

Expand Down Expand Up @@ -1006,7 +1064,8 @@ def _run_single(cls,
daily_reduction,
bias_ti,
decimals,
base_dh_inst=None):
base_dh_inst=None,
match_zero_rate=False):
"""Find the nominal scalar + adder combination to bias correct data
at a single site"""

Expand All @@ -1018,6 +1077,9 @@ def _run_single(cls,
decimals=decimals,
base_dh_inst=base_dh_inst)

if match_zero_rate:
bias_data = cls._match_zero_rate(bias_data, base_data)

base_arr = np.full(cls.NT, np.nan, dtype=np.float32)
out = {}

Expand Down Expand Up @@ -1117,10 +1179,12 @@ def _init_out(self):
f'bias_{self.bias_feature}_std',
f'bias_{self.bias_feature}_skew',
f'bias_{self.bias_feature}_kurtosis',
f'bias_{self.bias_feature}_zero_rate',
f'base_{self.base_dset}_mean',
f'base_{self.base_dset}_std',
f'base_{self.base_dset}_skew',
f'base_{self.base_dset}_kurtosis',
f'base_{self.base_dset}_zero_rate',
]

self.out = {k: np.full((*self.bias_gid_raster.shape, self.NT),
Expand All @@ -1138,13 +1202,17 @@ def _init_out(self):
self.out[bias_k] = arr.copy()

@classmethod
def _run_skill_eval(cls, bias_data, base_data, bias_feature, base_dset):
def _run_skill_eval(cls, bias_data, base_data, bias_feature, base_dset,
match_zero_rate=False):
"""Run skill assessment metrics on 1D datasets at a single site.
Note we run the KS test on the mean=0 distributions as per:
S. Brands et al. 2013 https://doi.org/10.1007/s00382-013-1742-8
"""

if match_zero_rate:
bias_data = cls._match_zero_rate(bias_data, base_data)

out = {}
bias_mean = np.nanmean(bias_data)
base_mean = np.nanmean(base_data)
Expand All @@ -1154,13 +1222,20 @@ def _run_skill_eval(cls, bias_data, base_data, bias_feature, base_dset):
out[f'bias_{bias_feature}_std'] = np.nanstd(bias_data)
out[f'bias_{bias_feature}_skew'] = stats.skew(bias_data)
out[f'bias_{bias_feature}_kurtosis'] = stats.kurtosis(bias_data)
out[f'bias_{bias_feature}_zero_rate'] = np.nanmean(bias_data == 0)

out[f'base_{base_dset}_mean'] = base_mean
out[f'base_{base_dset}_std'] = np.nanstd(base_data)
out[f'base_{base_dset}_skew'] = stats.skew(base_data)
out[f'base_{base_dset}_kurtosis'] = stats.kurtosis(base_data)
out[f'base_{base_dset}_zero_rate'] = np.nanmean(base_data == 0)

if match_zero_rate:
ks_out = stats.ks_2samp(base_data, bias_data)
else:
ks_out = stats.ks_2samp(base_data - base_mean,
bias_data - bias_mean)

ks_out = stats.ks_2samp(base_data - base_mean, bias_data - bias_mean)
out[f'{bias_feature}_ks_stat'] = ks_out.statistic
out[f'{bias_feature}_ks_p'] = ks_out.pvalue

Expand All @@ -1175,7 +1250,7 @@ def _run_skill_eval(cls, bias_data, base_data, bias_feature, base_dset):
@classmethod
def _run_single(cls, bias_data, base_fps, bias_feature, base_dset,
base_gid, base_handler, daily_reduction, bias_ti,
decimals, base_dh_inst=None):
decimals, base_dh_inst=None, match_zero_rate=False):
"""Do a skill assessment at a single site"""

base_data, base_ti = cls.get_base_data(base_fps, base_dset,
Expand All @@ -1189,12 +1264,11 @@ def _run_single(cls, bias_data, base_fps, bias_feature, base_dset,
f'bias_{bias_feature}_std_monthly': arr.copy(),
f'base_{base_dset}_mean_monthly': arr.copy(),
f'base_{base_dset}_std_monthly': arr.copy(),
f'{bias_feature}_scalar': arr.copy(),
f'{bias_feature}_adder': arr.copy(),
}

out.update(cls._run_skill_eval(bias_data, base_data,
bias_feature, base_dset))
bias_feature, base_dset,
match_zero_rate=match_zero_rate))

for month in range(1, 13):
bias_mask = bias_ti.month == month
Expand All @@ -1208,6 +1282,6 @@ def _run_single(cls, bias_data, base_fps, bias_feature, base_dset,
for k, v in mout.items():
if not k.endswith(('_scalar', '_adder')):
k += '_monthly'
out[k][month - 1] = v
out[k][month - 1] = v

return out
44 changes: 42 additions & 2 deletions tests/bias/test_bias_correction.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
"""pytests bias correction calculations"""
import os
import shutil
import tempfile

import h5py
Expand Down Expand Up @@ -526,7 +527,46 @@ def test_nc_base_file():

out = calc.run(fill_extend=True, max_workers=1)

assert (out['rsds_scalar'] == 1).all()
assert (out['rsds_adder'] == 0).all()
assert np.allclose(out['base_rsds_mean_monthly'],
out['bias_rsds_mean_monthly'])
assert np.allclose(out['base_rsds_mean'], out['bias_rsds_mean'])
assert np.allclose(out['base_rsds_std'], out['bias_rsds_std'])


def test_match_zero_rate():
"""Test feature to match the rate of zeros in the bias data based on the
zero rate in the base data. Useful for precip where GCMs have a low-precip
"drizzle" problem."""
bias_data = np.random.uniform(0, 1, 1000)
base_data = np.random.uniform(0, 1, 1000)
base_data[base_data < 0.1] = 0

skill = SkillAssessment._run_skill_eval(bias_data, base_data, 'f1', 'f1')
assert skill['bias_f1_zero_rate'] != skill['base_f1_zero_rate']
assert (bias_data == 0).mean() != (base_data == 0).mean()

skill = SkillAssessment._run_skill_eval(bias_data, base_data, 'f1', 'f1',
match_zero_rate=True)
assert (bias_data == 0).mean() == (base_data == 0).mean()
assert skill['bias_f1_zero_rate'] == skill['base_f1_zero_rate']
for p in (1, 5, 25, 50, 75, 95, 99):
assert np.allclose(skill[f'base_f1_percentile_{p}'],
np.percentile(base_data, p))

with tempfile.TemporaryDirectory() as td:
fp_nsrdb_temp = os.path.join(td, os.path.basename(FP_NSRDB))
shutil.copy(FP_NSRDB, fp_nsrdb_temp)
with h5py.File(fp_nsrdb_temp, 'a') as nsrdb_temp:
ghi = nsrdb_temp['ghi'][...]
ghi[:1000, :] = 0
nsrdb_temp['ghi'][...] = ghi
calc = SkillAssessment(fp_nsrdb_temp, FP_CC, 'ghi', 'rsds',
target=TARGET, shape=SHAPE,
distance_upper_bound=0.7,
bias_handler='DataHandlerNCforCC',
match_zero_rate=True)
out = calc.run(fill_extend=True, max_workers=1)

bias_rate = out['bias_rsds_zero_rate']
base_rate = out['base_ghi_zero_rate']
assert np.allclose(bias_rate, base_rate, rtol=0.005)

0 comments on commit d710fe3

Please sign in to comment.