diff --git a/flasc/energy_ratio/energy_ratio.py b/flasc/energy_ratio/energy_ratio.py index d12db9c8..e20dd1b6 100644 --- a/flasc/energy_ratio/energy_ratio.py +++ b/flasc/energy_ratio/energy_ratio.py @@ -89,7 +89,7 @@ def _compute_energy_ratio_single(df_, if len(df_) == 0: raise RuntimeError("After removing nulls, no data remains for computation.") - # Apply binning to dataframe + # Apply binning to dataframe and group by bin df_ = util.bin_and_group_dataframe(df_, ref_cols, test_cols, wd_cols, ws_cols, wd_step, wd_min, wd_max, ws_step, ws_min, ws_max, wd_bin_overlap_radius, remove_all_nulls, bin_cols_without_df_name, num_df) diff --git a/flasc/energy_ratio/total_uplift.py b/flasc/energy_ratio/total_uplift.py index d7d32473..099fd385 100644 --- a/flasc/energy_ratio/total_uplift.py +++ b/flasc/energy_ratio/total_uplift.py @@ -41,7 +41,7 @@ def _compute_total_uplift_single( ): """ - Compute the energy ratio between two sets of turbines. + Compute the total change in energy production between two sets of turbines. Args: df_ (pl.DataFrame): A dataframe containing the data to use in the calculation. @@ -74,7 +74,7 @@ def _compute_total_uplift_single( must be available to compute the bin. Defaults to False. Returns: - pl.DataFrame: A dataframe containing the energy ratio for each wind direction bin + dict: A dictionary with results indexed for each element of uplift_names pl.DataFrame: A dataframe containing the weights each wind direction and wind speed bin """ @@ -129,6 +129,124 @@ def _compute_total_uplift_single( return total_uplift_result, df_freq_pl + +# Bootstrap function wraps the _compute_energy_ratio function +def _compute_total_uplift_bootstrap(er_in, + ref_cols, + test_cols, + wd_cols, + ws_cols, + wd_step = 2.0, + wd_min = 0.0, + wd_max = 360.0, + ws_step = 1.0, + ws_min = 0.0, + ws_max = 50.0, + bin_cols_in = ['wd_bin','ws_bin'], + weight_by = 'min', #min, sum + df_freq_pl = None, + wd_bin_overlap_radius = 0., + uplift_pairs = [], + uplift_names = [], + N = 1, + percentiles=[5., 95.], + remove_all_nulls=False, + ): + + """ + Compute the total change in energy production between two sets of turbines with bootstrapping + + Args: + er_in (EnergyRatioInput): An EnergyRatioInput object containing the data to use in the calculation. + ref_cols (list[str]): A list of columns to use as the reference turbines + test_cols (list[str]): A list of columns to use as the test turbines + wd_cols (list[str]): A list of columns to derive the wind directions from + ws_cols (list[str]): A list of columns to derive the wind speeds from + wd_step (float): The width of the wind direction bins. + wd_min (float): The minimum wind direction to use. + wd_max (float): The maximum wind direction to use. + ws_step (float): The width of the wind speed bins. + ws_min (float): The minimum wind speed to use. + ws_max (float): The maximum wind speed to use. + bin_cols_in (list[str]): A list of column names to use for the wind speed and wind direction bins. + weight_by (str): How to weight the energy ratio, options are 'min', or 'sum'. 'min' means + the minimum count across the dataframes is used to weight the energy ratio. 'sum' means the sum of the counts + across the dataframes is used to weight the energy ratio. + df_freq_pl (pl.Dataframe) Polars dataframe of pre-provided per bin weights + wd_bin_overlap_radius (float): The distance in degrees one wd bin overlaps into the next, must be + less or equal to half the value of wd_step + uplift_pairs: (list[tuple]): List of pairs of df_names to compute uplifts for. Each element + of the list should be a tuple (or list) of length 2, where the first element will be the + base case in the uplift calculation and the second element will be the test case in the + uplift calculation. If None, no uplifts are computed. + uplift_names: (list[str]): Names for the uplift columns, following the order of the + pairs specified in uplift_pairs. If None, will default to "uplift_df_name1_df_name2" + N (int): The number of bootstrap samples to use. + percentiles: (list or None): percentiles to use when returning energy ratio bounds. + If specified as None with N > 1 (bootstrapping), defaults to [5, 95]. + remove_all_nulls: (bool): Construct reference and test by strictly requiring all data to be + available. If False, a minimum one data point from ref_cols, test_cols, wd_cols, and ws_cols + must be available to compute the bin. Defaults to False. + + + Returns: + pl.DataFrame: A dataframe containing the energy ratio between the two sets of turbines. + + """ + + # Otherwise run the function N times and concatenate the results to compute statistics + uplift_single_outs = [ + _compute_total_uplift_single( + er_in.resample_energy_table(perform_resample=(i != 0)), + er_in.df_names, + ref_cols, + test_cols, + wd_cols, + ws_cols, + wd_step, + wd_min, + wd_max, + ws_step, + ws_min, + ws_max, + bin_cols_in, + weight_by, + df_freq_pl, + wd_bin_overlap_radius, + uplift_pairs, + uplift_names, + remove_all_nulls + ) for i in range(N) + ] + # df_concat = pl.concat([uplift_single_out[0] for uplift_single_out in uplift_single_outs]) + # First output contains the original table; use that df_freq_pl + df_freq_pl = uplift_single_outs[0][1] + + + # Add in the statistics + total_uplift_result = {} + + for uplift_name in uplift_names: + delta_aeps = np.zeros(N) + percent_delta_aeps = np.zeros(N) + + for i in range(N): + delta_aeps[i] = uplift_single_outs[i][0][uplift_name][0] + percent_delta_aeps[i] = uplift_single_outs[i][0][uplift_name][1] + + delta_aep_central = delta_aeps[0] + delta_aep_lb = np.quantile(delta_aeps, percentiles[0]/100) + delta_aep_ub = np.quantile(delta_aeps, percentiles[1]/100) + + percent_delta_aep_central = percent_delta_aeps[0] + percent_delta_aep_lb = np.quantile(percent_delta_aeps, percentiles[0]/100) + percent_delta_aep_ub = np.quantile(percent_delta_aeps, percentiles[1]/100) + + total_uplift_result[uplift_name] = (delta_aep_central, delta_aep_lb, delta_aep_ub, percent_delta_aep_central, percent_delta_aep_lb, percent_delta_aep_ub) + + return total_uplift_result, df_freq_pl + + def compute_total_uplift(er_in: EnergyRatioInput, ref_turbines = None, test_turbines = None, @@ -318,64 +436,34 @@ def compute_total_uplift(er_in: EnergyRatioInput, remove_all_nulls ) else: - raise NotImplementedError( - "Bootstrapping not yet implemented for total power uplift calculation."+\ - "Please set N = 1." + if percentiles is None: + percentiles = [5, 95] + elif not hasattr(percentiles, "__len__") or len(percentiles) != 2: + raise ValueError("percentiles should be a two element list of the "+\ + "upper and lower desired percentiles.") + + total_uplift_result, df_freq_pl = _compute_total_uplift_bootstrap( + er_in, + ref_cols, + test_cols, + wd_cols, + ws_cols, + wd_step, + wd_min, + wd_max, + ws_step, + ws_min, + ws_max, + bin_cols_in, + weight_by, + df_freq_pl, + wd_bin_overlap_radius, + uplift_pairs, + uplift_names, + N, + percentiles ) - # if percentiles is None: - # percentiles = [5, 95] - # elif not hasattr(percentiles, "__len__") or len(percentiles) != 2: - # raise ValueError("percentiles should be a two element list of the "+\ - # "upper and lower desired percentiles.") - - # df_res, df_freq_pl, total_uplift_result = _compute_energy_ratio_bootstrap( - # er_in, - # ref_cols, - # test_cols, - # wd_cols, - # ws_cols, - # wd_step, - # wd_min, - # wd_max, - # ws_step, - # ws_min, - # ws_max, - # bin_cols_in, - # weight_by, - # df_freq_pl, - # wd_bin_overlap_radius, - # uplift_pairs, - # uplift_names, - # compute_total_uplift, - # N, - # percentiles - # ) - - # Return the df_freqs, handle as needed. - - # Sort df_res by df_names, ws, wd - - # # Return the results as an EnergyRatioOutput object - # return EnergyRatioOutput(df_res.to_pandas(), - # er_in, - # df_freq_pl.to_pandas(), - # ref_cols, - # test_cols, - # wd_cols, - # ws_cols, - # uplift_names, - # total_uplift_result, - # wd_step, - # wd_min, - # wd_max, - # ws_step, - # ws_min, - # ws_max, - # bin_cols_in, - # weight_by, - # wd_bin_overlap_radius, - # N) - + # Do we want some kind of more complex return object? Or are we OK # returning just the total_uplift_result dictionary? return total_uplift_result \ No newline at end of file diff --git a/tests/total_uplift_test.py b/tests/total_uplift_test.py index faba8c0d..2e43a926 100644 --- a/tests/total_uplift_test.py +++ b/tests/total_uplift_test.py @@ -25,7 +25,7 @@ def test_total_uplift(self): 'pow_001': [2., 2., 1., 1.,30.], }) - er_in = EnergyRatioInput([df_base, df_wake_steering],['baseline', 'wake_steering'], num_blocks=1) + er_in = EnergyRatioInput([df_base, df_wake_steering],['baseline', 'wake_steering']) total_uplift_result = tup.compute_total_uplift( er_in, @@ -76,3 +76,40 @@ def test_total_uplift(self): delta_aep, percent_delta_aep = total_uplift_result['uplift'] self.assertAlmostEqual(delta_aep, 18615 , places=4) self.assertAlmostEqual(percent_delta_aep, 47.22222222 , places=4) + + + def test_total_uplift_bootstrap(self): + + # Test the ability to compute the total uplift in energy production with bootstrapping + + # This time use ratios that are all 1 in the baseline case and between 1.5 and 2.5 + df_base = pd.DataFrame({'wd': [270, 270., 270.,270.,270.], + 'ws': [7., 8., 8.,8.,8.], + 'pow_000': [1., 1., 1., 1.,1.], + 'pow_001': [1., 1., 1., 1.,1.], + }) + + df_wake_steering = pd.DataFrame({'wd': [270, 270., 270.,270.,270.], + 'ws': [7., 7., 8.,8.,8.], + 'pow_000': [1., 1., 1., 1.,1.], + 'pow_001': [1.5, 1.7, 2., 2.25,2.5], + }) + + er_in = EnergyRatioInput([df_base, df_wake_steering],['baseline', 'wake_steering'], num_blocks=df_base.shape[0]) + + total_uplift_result = tup.compute_total_uplift( + er_in, + ref_turbines=[0], + test_turbines=[1], + use_predefined_wd=True, + use_predefined_ws=True, + wd_min = 269., + wd_step=2.0, + ws_min = 0.5, # Make sure bin labels land on whole numbers + weight_by='min', + uplift_pairs = ['baseline', 'wake_steering'], + uplift_names = ['uplift'], + N=10 + ) + + print(total_uplift_result) \ No newline at end of file