Skip to content

Commit

Permalink
Add initial bootstrapping
Browse files Browse the repository at this point in the history
  • Loading branch information
paulf81 committed Oct 18, 2023
1 parent d4a17cc commit 1d6c8b9
Show file tree
Hide file tree
Showing 3 changed files with 186 additions and 61 deletions.
2 changes: 1 addition & 1 deletion flasc/energy_ratio/energy_ratio.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def _compute_energy_ratio_single(df_,
if len(df_) == 0:
raise RuntimeError("After removing nulls, no data remains for computation.")

# Apply binning to dataframe
# Apply binning to dataframe and group by bin
df_ = util.bin_and_group_dataframe(df_, ref_cols, test_cols, wd_cols, ws_cols, wd_step, wd_min,
wd_max, ws_step, ws_min, ws_max, wd_bin_overlap_radius,
remove_all_nulls, bin_cols_without_df_name, num_df)
Expand Down
206 changes: 147 additions & 59 deletions flasc/energy_ratio/total_uplift.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def _compute_total_uplift_single(
):

"""
Compute the energy ratio between two sets of turbines.
Compute the total change in energy production between two sets of turbines.
Args:
df_ (pl.DataFrame): A dataframe containing the data to use in the calculation.
Expand Down Expand Up @@ -74,7 +74,7 @@ def _compute_total_uplift_single(
must be available to compute the bin. Defaults to False.
Returns:
pl.DataFrame: A dataframe containing the energy ratio for each wind direction bin
dict: A dictionary with results indexed for each element of uplift_names
pl.DataFrame: A dataframe containing the weights each wind direction and wind speed bin
"""

Expand Down Expand Up @@ -129,6 +129,124 @@ def _compute_total_uplift_single(

return total_uplift_result, df_freq_pl


# Bootstrap function wraps the _compute_energy_ratio function
def _compute_total_uplift_bootstrap(er_in,
ref_cols,
test_cols,
wd_cols,
ws_cols,
wd_step = 2.0,
wd_min = 0.0,
wd_max = 360.0,
ws_step = 1.0,
ws_min = 0.0,
ws_max = 50.0,
bin_cols_in = ['wd_bin','ws_bin'],
weight_by = 'min', #min, sum
df_freq_pl = None,
wd_bin_overlap_radius = 0.,
uplift_pairs = [],
uplift_names = [],
N = 1,
percentiles=[5., 95.],
remove_all_nulls=False,
):

"""
Compute the total change in energy production between two sets of turbines with bootstrapping
Args:
er_in (EnergyRatioInput): An EnergyRatioInput object containing the data to use in the calculation.
ref_cols (list[str]): A list of columns to use as the reference turbines
test_cols (list[str]): A list of columns to use as the test turbines
wd_cols (list[str]): A list of columns to derive the wind directions from
ws_cols (list[str]): A list of columns to derive the wind speeds from
wd_step (float): The width of the wind direction bins.
wd_min (float): The minimum wind direction to use.
wd_max (float): The maximum wind direction to use.
ws_step (float): The width of the wind speed bins.
ws_min (float): The minimum wind speed to use.
ws_max (float): The maximum wind speed to use.
bin_cols_in (list[str]): A list of column names to use for the wind speed and wind direction bins.
weight_by (str): How to weight the energy ratio, options are 'min', or 'sum'. 'min' means
the minimum count across the dataframes is used to weight the energy ratio. 'sum' means the sum of the counts
across the dataframes is used to weight the energy ratio.
df_freq_pl (pl.Dataframe) Polars dataframe of pre-provided per bin weights
wd_bin_overlap_radius (float): The distance in degrees one wd bin overlaps into the next, must be
less or equal to half the value of wd_step
uplift_pairs: (list[tuple]): List of pairs of df_names to compute uplifts for. Each element
of the list should be a tuple (or list) of length 2, where the first element will be the
base case in the uplift calculation and the second element will be the test case in the
uplift calculation. If None, no uplifts are computed.
uplift_names: (list[str]): Names for the uplift columns, following the order of the
pairs specified in uplift_pairs. If None, will default to "uplift_df_name1_df_name2"
N (int): The number of bootstrap samples to use.
percentiles: (list or None): percentiles to use when returning energy ratio bounds.
If specified as None with N > 1 (bootstrapping), defaults to [5, 95].
remove_all_nulls: (bool): Construct reference and test by strictly requiring all data to be
available. If False, a minimum one data point from ref_cols, test_cols, wd_cols, and ws_cols
must be available to compute the bin. Defaults to False.
Returns:
pl.DataFrame: A dataframe containing the energy ratio between the two sets of turbines.
"""

# Otherwise run the function N times and concatenate the results to compute statistics
uplift_single_outs = [
_compute_total_uplift_single(
er_in.resample_energy_table(perform_resample=(i != 0)),
er_in.df_names,
ref_cols,
test_cols,
wd_cols,
ws_cols,
wd_step,
wd_min,
wd_max,
ws_step,
ws_min,
ws_max,
bin_cols_in,
weight_by,
df_freq_pl,
wd_bin_overlap_radius,
uplift_pairs,
uplift_names,
remove_all_nulls
) for i in range(N)
]
# df_concat = pl.concat([uplift_single_out[0] for uplift_single_out in uplift_single_outs])
# First output contains the original table; use that df_freq_pl
df_freq_pl = uplift_single_outs[0][1]


# Add in the statistics
total_uplift_result = {}

for uplift_name in uplift_names:
delta_aeps = np.zeros(N)
percent_delta_aeps = np.zeros(N)

for i in range(N):
delta_aeps[i] = uplift_single_outs[i][0][uplift_name][0]
percent_delta_aeps[i] = uplift_single_outs[i][0][uplift_name][1]

delta_aep_central = delta_aeps[0]
delta_aep_lb = np.quantile(delta_aeps, percentiles[0]/100)
delta_aep_ub = np.quantile(delta_aeps, percentiles[1]/100)

percent_delta_aep_central = percent_delta_aeps[0]
percent_delta_aep_lb = np.quantile(percent_delta_aeps, percentiles[0]/100)
percent_delta_aep_ub = np.quantile(percent_delta_aeps, percentiles[1]/100)

total_uplift_result[uplift_name] = (delta_aep_central, delta_aep_lb, delta_aep_ub, percent_delta_aep_central, percent_delta_aep_lb, percent_delta_aep_ub)

return total_uplift_result, df_freq_pl


def compute_total_uplift(er_in: EnergyRatioInput,
ref_turbines = None,
test_turbines = None,
Expand Down Expand Up @@ -318,64 +436,34 @@ def compute_total_uplift(er_in: EnergyRatioInput,
remove_all_nulls
)
else:
raise NotImplementedError(
"Bootstrapping not yet implemented for total power uplift calculation."+\
"Please set N = 1."
if percentiles is None:
percentiles = [5, 95]
elif not hasattr(percentiles, "__len__") or len(percentiles) != 2:
raise ValueError("percentiles should be a two element list of the "+\
"upper and lower desired percentiles.")

total_uplift_result, df_freq_pl = _compute_total_uplift_bootstrap(
er_in,
ref_cols,
test_cols,
wd_cols,
ws_cols,
wd_step,
wd_min,
wd_max,
ws_step,
ws_min,
ws_max,
bin_cols_in,
weight_by,
df_freq_pl,
wd_bin_overlap_radius,
uplift_pairs,
uplift_names,
N,
percentiles
)
# if percentiles is None:
# percentiles = [5, 95]
# elif not hasattr(percentiles, "__len__") or len(percentiles) != 2:
# raise ValueError("percentiles should be a two element list of the "+\
# "upper and lower desired percentiles.")

# df_res, df_freq_pl, total_uplift_result = _compute_energy_ratio_bootstrap(
# er_in,
# ref_cols,
# test_cols,
# wd_cols,
# ws_cols,
# wd_step,
# wd_min,
# wd_max,
# ws_step,
# ws_min,
# ws_max,
# bin_cols_in,
# weight_by,
# df_freq_pl,
# wd_bin_overlap_radius,
# uplift_pairs,
# uplift_names,
# compute_total_uplift,
# N,
# percentiles
# )

# Return the df_freqs, handle as needed.

# Sort df_res by df_names, ws, wd

# # Return the results as an EnergyRatioOutput object
# return EnergyRatioOutput(df_res.to_pandas(),
# er_in,
# df_freq_pl.to_pandas(),
# ref_cols,
# test_cols,
# wd_cols,
# ws_cols,
# uplift_names,
# total_uplift_result,
# wd_step,
# wd_min,
# wd_max,
# ws_step,
# ws_min,
# ws_max,
# bin_cols_in,
# weight_by,
# wd_bin_overlap_radius,
# N)


# Do we want some kind of more complex return object? Or are we OK
# returning just the total_uplift_result dictionary?
return total_uplift_result
39 changes: 38 additions & 1 deletion tests/total_uplift_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def test_total_uplift(self):
'pow_001': [2., 2., 1., 1.,30.],
})

er_in = EnergyRatioInput([df_base, df_wake_steering],['baseline', 'wake_steering'], num_blocks=1)
er_in = EnergyRatioInput([df_base, df_wake_steering],['baseline', 'wake_steering'])

total_uplift_result = tup.compute_total_uplift(
er_in,
Expand Down Expand Up @@ -76,3 +76,40 @@ def test_total_uplift(self):
delta_aep, percent_delta_aep = total_uplift_result['uplift']
self.assertAlmostEqual(delta_aep, 18615 , places=4)
self.assertAlmostEqual(percent_delta_aep, 47.22222222 , places=4)


def test_total_uplift_bootstrap(self):

# Test the ability to compute the total uplift in energy production with bootstrapping

# This time use ratios that are all 1 in the baseline case and between 1.5 and 2.5
df_base = pd.DataFrame({'wd': [270, 270., 270.,270.,270.],
'ws': [7., 8., 8.,8.,8.],
'pow_000': [1., 1., 1., 1.,1.],
'pow_001': [1., 1., 1., 1.,1.],
})

df_wake_steering = pd.DataFrame({'wd': [270, 270., 270.,270.,270.],
'ws': [7., 7., 8.,8.,8.],
'pow_000': [1., 1., 1., 1.,1.],
'pow_001': [1.5, 1.7, 2., 2.25,2.5],
})

er_in = EnergyRatioInput([df_base, df_wake_steering],['baseline', 'wake_steering'], num_blocks=df_base.shape[0])

total_uplift_result = tup.compute_total_uplift(
er_in,
ref_turbines=[0],
test_turbines=[1],
use_predefined_wd=True,
use_predefined_ws=True,
wd_min = 269.,
wd_step=2.0,
ws_min = 0.5, # Make sure bin labels land on whole numbers
weight_by='min',
uplift_pairs = ['baseline', 'wake_steering'],
uplift_names = ['uplift'],
N=10
)

print(total_uplift_result)

0 comments on commit 1d6c8b9

Please sign in to comment.