NREL · paulf81 · Sep 8, 2023 · Sep 6, 2023 · Sep 6, 2023 · Sep 6, 2023
diff --git a/flasc/energy_ratio/energy_ratio.py b/flasc/energy_ratio/energy_ratio.py
@@ -12,7 +12,17 @@
 
 from flasc.energy_ratio.energy_ratio_output import EnergyRatioOutput
 from flasc.energy_ratio.energy_ratio_input import EnergyRatioInput
-from flasc.energy_ratio.energy_ratio_utilities import add_ws_bin, add_wd, add_wd_bin, add_power_ref, add_power_test, add_reflected_rows
+from flasc.energy_ratio.energy_ratio_utilities import (
+    add_ws_bin,
+    add_wd,
+    add_wd_bin,
+    add_power_ref,
+    add_power_test,
+    add_reflected_rows,
+    check_compute_energy_ratio_inputs,
+    filter_all_nulls,
+    filter_any_nulls
+)
 
 
 # Internal version, returns a polars dataframe
@@ -31,7 +41,8 @@ def _compute_energy_ratio_single(df_,
                          bin_cols_in = ['wd_bin','ws_bin'],
                          wd_bin_overlap_radius = 0.,
                          uplift_pairs = [],
-                         uplift_names = []
+                         uplift_names = [],
+                         remove_all_nulls = False
                          ):
 
     """
@@ -58,7 +69,10 @@ def _compute_energy_ratio_single(df_,
             base case in the uplift calculation and the second element will be the test case in the 
             uplift calculation. If None, no uplifts are computed.
         uplift_names: (list[str]): Names for the uplift columns, following the order of the 
-            pairs specified in uplift_pairs. If None, will default to "uplift_df_name1_df_name2"
+            pairs specified in uplift_pairs. If None, will default to "uplift_df_name1_df_name2",
+        remove_all_nulls: (bool): Construct reference and test by strictly requiring all data to be 
+            available. If False, a minimum one data point from ref_cols, test_cols, wd_cols, and ws_cols
+            must be available to compute the bin. Defaults to False.
 
     Returns:
         pl.DataFrame: A dataframe containing the energy ratio for each wind direction bin
@@ -67,23 +81,25 @@ def _compute_energy_ratio_single(df_,
     # Identify the number of dataframes
     num_df = len(df_names)
 
-    # Filter df_ that all the columns are not null
-    print(ref_cols + test_cols + ws_cols + wd_cols)
-    df_ = df_.filter(pl.all_horizontal(pl.col(ref_cols + test_cols + ws_cols + wd_cols).is_not_null()))
+    # Filter df_ to remove null values
+    null_filter = filter_all_nulls if remove_all_nulls else filter_any_nulls
+    df_ = null_filter(df_, ref_cols, test_cols, ws_cols, wd_cols)
+    if len(df_) == 0:
+        raise RuntimeError("After removing nulls, no data remains for computation.")
 
     # If wd_bin_overlap_radius is not zero, add reflected rows
     if wd_bin_overlap_radius > 0.:
 
         # Need to obtain the wd column now rather than during binning
-        df_ = add_wd(df_, wd_cols)
+        df_ = add_wd(df_, wd_cols, remove_all_nulls)
 
         # Add reflected rows
         edges = np.arange(wd_min, wd_max + wd_step, wd_step)
         df_ = add_reflected_rows(df_, edges, wd_bin_overlap_radius)
 
     # Assign the wd/ws bins
-    df_ = add_ws_bin(df_, ws_cols, ws_step, ws_min, ws_max)
-    df_ = add_wd_bin(df_, wd_cols, wd_step, wd_min, wd_max)
+    df_ = add_ws_bin(df_, ws_cols, ws_step, ws_min, ws_max, remove_all_nulls=remove_all_nulls)
+    df_ = add_wd_bin(df_, wd_cols, wd_step, wd_min, wd_max, remove_all_nulls=remove_all_nulls)
 
 
 
@@ -150,7 +166,8 @@ def _compute_energy_ratio_bootstrap(er_in,
                          uplift_pairs = [],
                          uplift_names = [],
                          N = 1,
-                         percentiles=[5., 95.]
+                         percentiles=[5., 95.],
+                         remove_all_nulls=False,
                          ):
 
     """
@@ -178,6 +195,12 @@ def _compute_energy_ratio_bootstrap(er_in,
         uplift_names: (list[str]): Names for the uplift columns, following the order of the 
             pairs specified in uplift_pairs. If None, will default to "uplift_df_name1_df_name2"
         N (int): The number of bootstrap samples to use.
+        percentiles: (list or None): percentiles to use when returning energy ratio bounds. 
+            If specified as None with N > 1 (bootstrapping), defaults to [5, 95].
+        remove_all_nulls: (bool): Construct reference and test by strictly requiring all data to be 
+                available. If False, a minimum one data point from ref_cols, test_cols, wd_cols, and ws_cols
+                must be available to compute the bin. Defaults to False.
+
 
     Returns:
         pl.DataFrame: A dataframe containing the energy ratio between the two sets of turbines.
@@ -201,7 +224,8 @@ def _compute_energy_ratio_bootstrap(er_in,
                         bin_cols_in,
                         wd_bin_overlap_radius,
                         uplift_pairs,
-                        uplift_names
+                        uplift_names,
+                        remove_all_nulls
                         ) for i in range(N)])
 
     bound_names = er_in.df_names + uplift_names
@@ -235,7 +259,8 @@ def compute_energy_ratio(er_in: EnergyRatioInput,
                          uplift_pairs = None,
                          uplift_names = None,
                          N = 1,
-                         percentiles = None
+                         percentiles = None,
+                         remove_all_nulls = False
                          )-> EnergyRatioOutput:
 
     """
@@ -268,6 +293,9 @@ def compute_energy_ratio(er_in: EnergyRatioInput,
         N (int): The number of bootstrap samples to use.
         percentiles: (list or None): percentiles to use when returning energy ratio bounds. 
             If specified as None with N > 1 (bootstrapping), defaults to [5, 95].
+        remove_all_nulls: (bool): Construct reference and test by strictly requiring all data to be 
+                available. If False, a minimum one data point from ref_cols, test_cols, wd_cols, and ws_cols
+                must be available to compute the bin. Defaults to False.
 
     Returns:
         EnergyRatioOutput: An EnergyRatioOutput object containing the energy ratio between the two sets of turbines.
@@ -277,55 +305,30 @@ def compute_energy_ratio(er_in: EnergyRatioInput,
     # Get the polars dataframe from within the er_in
     df_ = er_in.get_df()
 
-    # Check that the inputs are valid
-    # If use_predefined_ref is True, df_ must have a column named 'pow_ref'
-    if use_predefined_ref:
-        if 'pow_ref' not in df_.columns:
-            raise ValueError('df_ must have a column named pow_ref when use_predefined_ref is True')
-        # If ref_turbines supplied, warn user that it will be ignored
-        if ref_turbines is not None:
-            warnings.warn('ref_turbines will be ignored when use_predefined_ref is True')
-    else:
-        # ref_turbine must be supplied
-        if ref_turbines is None:
-            raise ValueError('ref_turbines must be supplied when use_predefined_ref is False')
-
-    # If use_predefined_ws is True, df_ must have a column named 'ws'
-    if use_predefined_ws:
-        if 'ws' not in df_.columns:
-            raise ValueError('df_ must have a column named ws when use_predefined_ws is True')
-        # If ws_turbines supplied, warn user that it will be ignored
-        if ws_turbines is not None:
-            warnings.warn('ws_turbines will be ignored when use_predefined_ws is True')
-    else:
-        # ws_turbine must be supplied
-        if ws_turbines is None:
-            raise ValueError('ws_turbines must be supplied when use_predefined_ws is False')
-
-    # If use_predefined_wd is True, df_ must have a column named 'wd'
-    if use_predefined_wd:
-        if 'wd' not in df_.columns:
-            raise ValueError('df_ must have a column named wd when use_predefined_wd is True')
-        # If wd_turbines supplied, warn user that it will be ignored
-        if wd_turbines is not None:
-            warnings.warn('wd_turbines will be ignored when use_predefined_wd is True')
-    else:
-        # wd_turbine must be supplied
-        if wd_turbines is None:
-            raise ValueError('wd_turbines must be supplied when use_predefined_wd is False')
-
-
-    # Confirm that test_turbines is a list of ints or a numpy array of ints
-    if not isinstance(test_turbines, list) and not isinstance(test_turbines, np.ndarray):
-        raise ValueError('test_turbines must be a list or numpy array of ints')
-
-    # Confirm that test_turbines is not empty  
-    if len(test_turbines) == 0:
-        raise ValueError('test_turbines cannot be empty')
-
-    # Confirm that wd_bin_overlap_radius is less than or equal to wd_step/2
-    if wd_bin_overlap_radius > wd_step/2:
-        raise ValueError('wd_bin_overlap_radius must be less than or equal to wd_step/2')
+    # Check that inputs are valid
+    check_compute_energy_ratio_inputs(
+        df_,
+        ref_turbines,
+        test_turbines,
+        wd_turbines,
+        ws_turbines,
+        use_predefined_ref,
+        use_predefined_wd,
+        use_predefined_ws,
+        wd_step,
+        wd_min,
+        wd_max,
+        ws_step,
+        ws_min,
+        ws_max,
+        bin_cols_in,
+        wd_bin_overlap_radius,
+        uplift_pairs,
+        uplift_names,
+        N,
+        percentiles,
+        remove_all_nulls
+    )
 
      # Set up the column names for the reference and test power
     if not use_predefined_ref:
@@ -382,7 +385,8 @@ def compute_energy_ratio(er_in: EnergyRatioInput,
                         bin_cols_in,
                         wd_bin_overlap_radius,
                         uplift_pairs,
-                        uplift_names
+                        uplift_names,
+                        remove_all_nulls
                     )
     else:
         if percentiles is None:

diff --git a/flasc/energy_ratio/energy_ratio_input.py b/flasc/energy_ratio/energy_ratio_input.py
@@ -4,7 +4,6 @@
 
 from typing import Optional, Dict, List, Any, Tuple, Union
 
-from flasc.energy_ratio.energy_ratio_utilities import add_ws_bin, add_wd_bin
 from flasc.dataframe_operations.dataframe_manipulations import df_reduce_precision
 
 

diff --git a/flasc/energy_ratio/energy_ratio_output.py b/flasc/energy_ratio/energy_ratio_output.py
@@ -10,7 +10,12 @@
 
 from flasc.energy_ratio.energy_ratio_input import EnergyRatioInput
 
-from flasc.energy_ratio.energy_ratio_utilities import add_ws_bin, add_wd_bin
+from flasc.energy_ratio.energy_ratio_utilities import (
+    add_ws_bin,
+    add_wd_bin,
+    filter_all_nulls,
+    filter_any_nulls
+)
 
 
 class EnergyRatioOutput:
@@ -33,7 +38,8 @@ def __init__(self,
                  ws_max: float,
                  bin_cols_in: List[str],
                  wd_bin_overlap_radius: float,
-                 N: int
+                 N: int,
+                 remove_all_nulls: bool = False
                 ) -> None:
         """Initialize an EnergyRatioOutput object.
 
@@ -54,6 +60,9 @@ def __init__(self,
             bin_cols_in (List[str]): TBD
             wd_bin_overlap_radius (float): The radius of overlap between wind direction bins.
             N (int): The number of bootstrap iterations used in the energy ratio calculation.
+            remove_all_nulls: (bool): Construct reference and test by strictly requiring all data to be 
+                available. If False, a minimum one data point from ref_cols, test_cols, wd_cols, and ws_cols
+                must be available to compute the bin. Defaults to False.
         """
         self.df_result = df_result
         self.df_names = er_in.df_names
@@ -73,6 +82,7 @@ def __init__(self,
         self.bin_cols_in = bin_cols_in
         self.wd_bin_overlap_radius = wd_bin_overlap_radius
         self.N = N
+        self.remove_all_nulls = remove_all_nulls
 
     def _compute_df_freq(self):
         """ Compute the of ws/wd as previously computed but not presently
@@ -83,12 +93,15 @@ def _compute_df_freq(self):
         # Temporary copy of energy table
         df_ = self.er_in.get_df()
 
-        # Filter df_ that all the columns are not null
-        df_ = df_.filter(pl.all_horizontal(pl.col(self.ref_cols + self.test_cols + self.ws_cols + self.wd_cols).is_not_null()))
+        # Filter df_ to remove null values
+        null_filter = filter_all_nulls if self.remove_all_nulls else filter_any_nulls
+        df_ = null_filter(df_, self.ref_cols, self.test_cols, self.ws_cols, self.wd_cols)
 
         # Assign the wd/ws bins
-        df_ = add_ws_bin(df_, self.ws_cols, self.ws_step, self.ws_min, self.ws_max)
-        df_ = add_wd_bin(df_, self.wd_cols, self.wd_step, self.wd_min, self.wd_max)
+        df_ = add_ws_bin(df_, self.ws_cols, self.ws_step, self.ws_min, self.ws_max,
+            remove_all_bins=self.remove_all_nulls)
+        df_ = add_wd_bin(df_, self.wd_cols, self.wd_step, self.wd_min, self.wd_max,
+            remove_all_bins=self.remove_all_nulls)
 
         # Get the bin count by wd, ws and df_name
         df_group = df_.groupby(['wd_bin','ws_bin','df_name']).count()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,7 +4,6 @@

		from typing import Optional, Dict, List, Any, Tuple, Union

		from flasc.energy_ratio.energy_ratio_utilities import add_ws_bin, add_wd_bin
		from flasc.dataframe_operations.dataframe_manipulations import df_reduce_precision


Expand Down