Merge in changes from develop; address conflicts; rename test to avoi…

…d call by pytest.
paulf81 · Oct 13, 2023 · 335f753 · 335f753
2 parents 32bd8a3 + 34dcddb
commit 335f753
Show file tree

Hide file tree

Showing 8 changed files with 637 additions and 214 deletions.
diff --git a/examples_smarteole/05_baseline_energy_ratio_analysis.ipynb b/examples_smarteole/05_baseline_energy_ratio_analysis.ipynb
diff --git a/examples_smarteole/06_wake_steering_energy_ratio_analysis.ipynb b/examples_smarteole/06_wake_steering_energy_ratio_analysis.ipynb
diff --git a/examples_smarteole/test_script.py → examples_smarteole/0X_test_script.py b/examples_smarteole/test_script.py → examples_smarteole/0X_test_script.py
diff --git a/flasc/energy_ratio/energy_ratio.py b/flasc/energy_ratio/energy_ratio.py
diff --git a/flasc/energy_ratio/energy_ratio_input.py b/flasc/energy_ratio/energy_ratio_input.py
@@ -1,3 +1,4 @@
+from xmlrpc.client import Boolean
 import numpy as np
 import pandas as pd
 import polars as pl
@@ -91,27 +92,20 @@ def get_df(self) -> pl.DataFrame:
         return self.df_pl.clone()
 
     def resample_energy_table(self, 
-                              i: int,
+                              perform_resample: bool=True
                               )-> pl.DataFrame:
         """Use the block column of an energy table to resample the data.
 
         Args:
             df_e_ (pl.DataFrame): An energy table with a block column
-            i (int): An index code where 0 returns the original energy table and
-                 and other values return a resampled energy table
+            perform_resample: Boolean, if False returns original energy table
 
         Returns:
             pl.DataFrame: A new energy table with (approximately)
                 the same number of rows as the original
         """
 
-        # If i is 0, return the original dataframe without resampling
-        if i == 0: 
-            return self.get_df()
-
-        # If i is not 0, resample the dataframe
-        else:
-
+        if perform_resample:
             # Generate a random np.array, num_blocks long, where each element is
             #  an integer between 0 and num_blocks-1
             block_list = np.random.randint(0, self.num_blocks, self.num_blocks)
@@ -120,4 +114,6 @@ def resample_energy_table(self,
                 {
                     'block':block_list
                 }
-                ).join(self.df_pl, how='inner', on='block')
+                ).join(self.df_pl, how='inner', on='block')
+        else: 
+            return self.get_df()
diff --git a/flasc/energy_ratio/energy_ratio_output.py b/flasc/energy_ratio/energy_ratio_output.py
@@ -25,6 +25,7 @@ class EnergyRatioOutput:
     def __init__(self,
                  df_result: pd.DataFrame,
                  er_in: EnergyRatioInput,
+                 df_freq: pd.DataFrame,
                  ref_cols: List[str],
                  test_cols: List[str],
                  wd_cols: List[str],
@@ -46,7 +47,8 @@ def __init__(self,
 
         Args:
             df_result (pd.DataFrame): The energy ratio results.
-            eri (EnergyRatioInput): The energy table used in the energy ratio calculation.
+            er_in (EnergyRatioInput): The energy table used in the energy ratio calculation.
+            df_freq (pd.DataFrame): Weights used for bins.
             ref_cols (List[str]): The column names of the reference turbines.
             test_cols (List[str]): The column names of the test wind turbines.
             wd_cols (List[str]): The column names of the wind directions.
@@ -69,6 +71,7 @@ def __init__(self,
                 must be available to compute the bin. Defaults to False.
         """
         self.df_result = df_result
+        self.df_freq = df_freq
         self.df_names = er_in.df_names
         self.num_df = len(self.df_names)
         self.er_in = er_in
@@ -89,34 +92,6 @@ def __init__(self,
         self.N = N
         self.remove_all_nulls = remove_all_nulls
 
-    def _compute_df_freq(self):
-        """ Compute the of ws/wd as previously computed but not presently
-        computed with the energy calculation. """
-
-        #TODO: I don't think so, but should this function count overlapping bins?
-
-        # Temporary copy of energy table
-        df_ = self.er_in.get_df()
-
-        # Filter df_ to remove null values
-        null_filter = filter_all_nulls if self.remove_all_nulls else filter_any_nulls
-        df_ = null_filter(df_, self.ref_cols, self.test_cols, self.ws_cols, self.wd_cols)
-
-        # Assign the wd/ws bins
-        df_ = add_ws_bin(df_, self.ws_cols, self.ws_step, self.ws_min, self.ws_max,
-            remove_all_nulls=self.remove_all_nulls)
-        df_ = add_wd_bin(df_, self.wd_cols, self.wd_step, self.wd_min, self.wd_max,
-            remove_all_nulls=self.remove_all_nulls)
-
-        # Get the bin count by wd, ws and df_name
-        df_group = df_.groupby(['wd_bin','ws_bin','df_name']).count()
-
-        # Collect the minimum number of points per bin
-        df_min = df_group.groupby(['wd_bin','ws_bin']).min()
-        df_sum = df_group.groupby(['wd_bin','ws_bin']).sum()
-
-        return df_.to_pandas(), df_group.to_pandas(), df_min.to_pandas(), df_sum.to_pandas()
-
     def plot_energy_ratios(self,
         df_names_subset: Optional[List[str]] = None,
         labels: Optional[List[str]] = None,
@@ -125,6 +100,7 @@ def plot_energy_ratios(self,
         polar_plot: bool = False,
         show_wind_direction_distribution: bool = True,
         show_wind_speed_distribution: bool | None = None,
+        overlay_frequency: bool = False,
         _is_uplift: bool = False
     ) -> Union[axes.Axes, List[axes.Axes]]:
         """Plot the energy ratios.
@@ -137,6 +113,7 @@ def plot_energy_ratios(self,
             polar_plot (bool, optional): Whether to plot the energy ratios on a polar plot. Defaults to False.
             show_wind_direction_distribution (bool, optional): Whether to show the wind direction distribution. Defaults to True.
             show_wind_speed_distribution (bool, optional): Whether to show the wind speed distribution. Defaults to True, unless polar_plot is True.
+            overlay_frequency (bool, optional): Whether to plot the frequency distribution used for calculation.
             _is_uplift (bool, optional): Whether being called by plot_uplift(). Defaults to False.
 
         Returns:
@@ -192,13 +169,14 @@ def plot_energy_ratios(self,
         # If color_dict is None, use the default colors
         if color_dict is None:
             color_dict = {labels[i]: default_colors[i] for i in range(N)}
+            color_dict["weight"] = "black"
 
         # If color_dict is not a dictionary, raise an error
         if not isinstance(color_dict, dict):
             raise ValueError('color_dict must be a dictionary')
 
         # Make sure the keys of color_dict are in labels
-        if not all([label in labels for label in color_dict.keys()]):
+        if not all([label in labels+["weight"] for label in color_dict.keys()]):
             raise ValueError('color_dict keys must be in df_names_subset')
 
         if axarr is None:
@@ -317,26 +295,30 @@ def plot_energy_ratios(self,
         if polar_plot:
             bar_width = bar_width * np.pi / 180.0
 
-        # Plot the bin counts
-        _, df_freq, df_min, df_sum  = self._compute_df_freq()
-        df_freq_sum_all_ws = df_freq.groupby(["wd_bin","df_name"]).sum().reset_index()
-
         for i, (df_name, label) in enumerate(zip(df_names_subset, labels)):
             if _is_uplift: # Special case, use the minimum or the sum
-                if self.weight_by == 'min':
-                    df_sub = df_min
-                    ax.set_title('Minimum of Points per Bin')
-                else:
-                    df_sub = df_sum
-                    ax.set_title('Sum of Points per Bin')
+                ax.set_title('Minimum of Points per Bin' if self.weight_by == "min" else
+                    'Sum of Points per Bin')
             else:
-                df_sub = df_freq_sum_all_ws[df_freq_sum_all_ws["df_name"] == df_name]
                 ax.set_title('Number of Points per Bin')
 
-            x = np.array(df_sub["wd_bin"], dtype=float)
+            x = np.array(self.df_result["wd_bin"], dtype=float)
             if polar_plot: # Convert to radians
                 x = (90.0 - x) * np.pi / 180.0
-            axarr[1].bar(x - (i - N / 2) * bar_width, df_sub["count"], width=bar_width, label = label, color=color_dict[label])
+            ax.bar(
+                x - (i - N / 2) * bar_width,
+                self.df_result["count_"+df_name],
+                width=bar_width,
+                label=label,
+                color=color_dict[label]
+            )
+        if overlay_frequency:
+            if "weight" in color_dict:
+                col = color_dict["weight"]
+            else:
+                col = "black"
+            df_wd_weight = self.df_freq.drop(columns='ws_bin').groupby('wd_bin').sum().reset_index()
+            ax.plot(df_wd_weight['wd_bin'], df_wd_weight['weight'], color=col, label="Weight")
 
         ax.legend()
         ax.set_ylabel('Number of Points')
@@ -350,12 +332,10 @@ def plot_energy_ratios(self,
 
         ax = axarr[2]        
 
-        if self.weight_by == 'min':
-            sns.scatterplot(data=df_min, x='wd_bin', y='ws_bin', size='count',hue='count', ax=ax, legend=True, color='k')
-            ax.set_title('Minimum Number of Points per Bin')
-        else:
-            sns.scatterplot(data=df_sum, x='wd_bin', y='ws_bin', size='count',hue='count', ax=ax, legend=True, color='k')
-            ax.set_title('Sum of Points per Bin')
+        df_bin_counts = self._compute_ws_counts()
+        sns.scatterplot(data=df_bin_counts, x='wd_bin', y='ws_bin', size='count',hue='count', ax=ax, legend=True, color='k')
+        ax.set_title('Minimum Number of Points per Bin' if self.weight_by == "min" else 
+            'Sum of Points per Bin')
         ax.set_xlabel('Wind Direction (deg)')
         ax.set_ylabel('Wind Speed (m/s)')
 
@@ -371,7 +351,8 @@ def plot_uplift(self,
         axarr: Optional[Union[axes.Axes, List[axes.Axes]]] = None,
         polar_plot: bool = False,
         show_wind_direction_distribution: bool = True,
-        show_wind_speed_distribution: bool = True
+        show_wind_speed_distribution: bool = True,
+        overlay_frequency: bool = False,
     )-> Union[axes.Axes, List[axes.Axes]]:
         """Plot the uplift in energy ratio
 
@@ -383,6 +364,7 @@ def plot_uplift(self,
             polar_plot (bool, optional): Whether to plot the uplift on a polar plot. Defaults to False.
             show_wind_direction_distribution (bool, optional): Whether to show the wind direction distribution. Defaults to True.
             show_wind_speed_distribution (bool, optional): Whether to show the wind speed distribution. Defaults to True, unless polar_plot is True.
+            overlay_frequency (bool, optional): Whether to plot the frequency distribution used for calculation.
 
         Raises:
             ValueError: If show_wind_speed_distribution is True and polar_plot is True.
@@ -441,13 +423,14 @@ def plot_uplift(self,
         # If color_dict is None, use the default colors
         if color_dict is None:
             color_dict = {labels[i]: default_colors[i] for i in range(N)}
+            color_dict["weight"] = "black"
 
         # If color_dict is not a dictionary, raise an error
         if not isinstance(color_dict, dict):
             raise ValueError('color_dict must be a dictionary')
 
         # Make sure the keys of color_dict are in labels
-        if not all([label in labels for label in color_dict.keys()]):
+        if not all([label in labels+["weight"] for label in color_dict.keys()]):
             raise ValueError('color_dict keys must be in df_names_subset')
 
 
@@ -472,6 +455,7 @@ def plot_uplift(self,
             polar_plot=polar_plot,
             show_wind_direction_distribution=show_wind_direction_distribution,
             show_wind_speed_distribution=show_wind_speed_distribution,
+            overlay_frequency=overlay_frequency,
             _is_uplift=True
         )
 
@@ -494,3 +478,29 @@ def plot_uplift(self,
         # ax.set_title("Minimum Number of Points per Bin")
 
         return axarr
+
+    def _compute_ws_counts(self):
+        """ Compute the of ws bin counts as previously computed but not presently
+        computed with the energy calculation. """
+
+        # Temporary copy of energy table
+        df_ = self.er_in.get_df()
+
+        # Filter df_ to remove null values
+        null_filter = filter_all_nulls if self.remove_all_nulls else filter_any_nulls
+        df_ = null_filter(df_, self.ref_cols, self.test_cols, self.ws_cols, self.wd_cols)
+
+        # Assign the wd/ws bins
+        df_ = add_ws_bin(df_, self.ws_cols, self.ws_step, self.ws_min, self.ws_max,
+            remove_all_nulls=self.remove_all_nulls)
+        df_ = add_wd_bin(df_, self.wd_cols, self.wd_step, self.wd_min, self.wd_max,
+            remove_all_nulls=self.remove_all_nulls)
+
+        # Get the bin count by wd, ws and df_name
+        df_group = df_.groupby(['wd_bin','ws_bin','df_name']).count()
+
+        # Collect the minimum number of points per bin
+        df_return = df_group.groupby(['wd_bin','ws_bin']).min() if self.weight_by == "min" \
+            else df_group.groupby(['wd_bin','ws_bin']).sum()
+
+        return df_return.drop('df_name').to_pandas()
diff --git a/flasc/energy_ratio/energy_ratio_utilities.py b/flasc/energy_ratio/energy_ratio_utilities.py
@@ -5,7 +5,7 @@
 from typing import Union, List, Optional
 
 
-
+#TODO: Someday I think can replace with polars-native code: https://github.com/pola-rs/polars/issues/8551
 def cut(col_name: str,
         edges: Union[np.ndarray, list],
     ) -> pl.Expr:
@@ -56,7 +56,7 @@ def bin_column(df_: pl.DataFrame,
         cut(
             col_name=col_name,
             edges = edges
-        ).alias(bin_col_name)
+        ).alias(bin_col_name).cast(df_[col_name].dtype)
     )
 
 def add_ws(df_: pl.DataFrame,
@@ -386,6 +386,7 @@ def check_compute_energy_ratio_inputs(
     ws_max,
     bin_cols_in,
     weight_by,
+    df_freq,
     wd_bin_overlap_radius,
     uplift_pairs,
     uplift_names,
@@ -452,5 +453,10 @@ def check_compute_energy_ratio_inputs(
     # Confirm the weight_by argument is valid
     if weight_by not in ['min', 'sum']:
         raise ValueError('weight_by must be one of "min", or "sum"')
+
+    # Confirm df_freq contains ws, wd and freq_val
+    if df_freq is not None:
+        if ('ws' not in df_freq.columns) or ('wd' not in df_freq.columns) or  ('freq_val' not in df_freq.columns):
+            raise ValueError('df_freq must have columns ws, wd and freq_val')
 
     return None