Streamline processing from bands

-modified frequency_bands_obj() method of decimation_level.py to remove duplication of band_edges assignment -tried to remove all calls to deprecated from_emtf_band_df() method [Issue(s): #150]
simpeg · Sep 10, 2022 · 60344fb · 60344fb
1 parent 64cb52a
commit 60344fb
Show file tree

Hide file tree

Showing 4 changed files with 73 additions and 14 deletions.
diff --git a/aurora/config/metadata/decimation_level.py b/aurora/config/metadata/decimation_level.py
@@ -8,6 +8,7 @@
 # Imports
 # =============================================================================
 import numpy as np
+import pandas as pd
 
 from mt_metadata.base.helpers import write_lines
 from mt_metadata.base import get_schema, Base
@@ -117,18 +118,49 @@ def upper_bounds(self):
 
         return np.array(sorted([band.index_max for band in self.bands]))
 
-    def frequency_bands_obj(self):
+    @property
+    def bands_dataframe(self):
+        """
+        This is just a utility function that transforms a list of bands into a dataframe
+        ToDo: Consider make this a method of Bands()
+
+        Note: The decimation_level here is +1 to agree with EMTF convention.
+        Not clear this is really necessary
+
+        ToDo: Consider adding columns lower_edge, upper_edge to df
+
+        Parameters
+        ----------
+        band_list: list
+            obtained from aurora.config.metadata.decimation_level.DecimationLevel.bands
+
+        Returns
+        -------
+        out_df: pd.Dataframe
+            Same format as that generated by EMTFBandSetupFile.get_decimation_level()
+        """
         from aurora.time_series.frequency_band_helpers import df_from_bands
+
+        bands_df = df_from_bands(self.bands)
+        return bands_df
+
+    def frequency_bands_obj(self):
+        """
+        Gets a FrequencyBands object that is used as input to processing.
+        This used to be needed because I only had
+
+        ToDO: consider adding .to_frequnecy_bands() method directly to self.bands
+        Returns
+        -------
+
+        """
         from aurora.time_series.frequency_band import FrequencyBands
 
-        emtf_band_df = df_from_bands(self.bands)
-        frequency_bands = FrequencyBands()
-        frequency_bands.from_emtf_band_df(
-            emtf_band_df,
-            self.decimation.level,
-            self.decimation.sample_rate,
-            self.window.num_samples,
-        )
+        bands_df = self.bands_dataframe
+        band_edges = np.vstack(
+            (bands_df.frequency_min.values, bands_df.frequency_max.values)
+        ).T
+        frequency_bands = FrequencyBands(band_edges=band_edges)
         return frequency_bands
 
     @property

diff --git a/aurora/sandbox/io_helpers/emtf_band_setup.py b/aurora/sandbox/io_helpers/emtf_band_setup.py
@@ -75,8 +75,8 @@ def compute_band_edges(self, decimation_factors, num_samples_window):
 
         """
         band_edges = {}
-        lower_edges = pd.Series(index=self.df.index)
-        upper_edges = pd.Series(index=self.df.index)
+        lower_edges = pd.Series(index=self.df.index, dtype="float64")
+        upper_edges = pd.Series(index=self.df.index, dtype="float64")
         if not self.sample_rate:
             print("cannot define frequencies of sample rate undefined")
             raise Exception

diff --git a/aurora/time_series/frequency_band.py b/aurora/time_series/frequency_band.py
@@ -165,6 +165,10 @@ class FrequencyBands(object):
     Context: A band is an Interval().
     FrequencyBands can be represented as an IntervalSet()
 
+    The core underlying variable is "band_edges", which is a 2D array, with one row
+    per frequency band and two columns, one for the left-hand (lower bound) of the
+    frequency band and one for the right-hand (upper bound).
+
     Using a single "band_edges" array of fenceposts is not a general solution. There
     is no reason to force the bands to be adjacent.  Therefore, will stop supporting
     band_edges 1-D array.  band_edges will need to be a 2d array.  n_bands, 2
@@ -198,8 +202,6 @@ def __init__(self, **kwargs):
         """
         self.gates = None
         self.band_edges = kwargs.get("band_edges", None)
-        # self.bands = OrderedDict()
-        # frequencies ... can repeat (log spacing)
 
     @property
     def number_of_bands(self):
@@ -341,6 +343,20 @@ def from_emtf_band_setup(
     def from_emtf_band_df(
         self, emtf_band_df, decimation_level, sample_rate, num_samples_window
     ):
+        """
+        There is some duplication of calculation here ... this definition of edges
+        from emtf_band_df is also done in
+        Parameters
+        ----------
+        emtf_band_df
+        decimation_level
+        sample_rate
+        num_samples_window
+
+        Returns
+        -------
+
+        """
         df = sample_rate / (num_samples_window)
         half_df = df / 2.0
         # half_df /=100

diff --git a/aurora/time_series/frequency_band_helpers.py b/aurora/time_series/frequency_band_helpers.py
@@ -158,6 +158,9 @@ def configure_frequency_bands(config):
 
 def df_from_bands(band_list):
     """
+    This is just a utility function that transforms a list of bands into a dataframe
+    ToDo: Mkae this a method of Bands()
+
     Note: The decimation_level here is +1 to agree with EMTF convention.
     Not clear this is really necessary
 
@@ -173,7 +176,13 @@ def df_from_bands(band_list):
     """
     import pandas as pd
 
-    df_columns = ["decimation_level", "lower_bound_index", "upper_bound_index"]
+    df_columns = [
+        "decimation_level",
+        "lower_bound_index",
+        "upper_bound_index",
+        "frequency_min",
+        "frequency_max",
+    ]
     n_rows = len(band_list)
     df_columns_dict = {}
     for col in df_columns:
@@ -182,6 +191,8 @@ def df_from_bands(band_list):
         df_columns_dict["decimation_level"][i_band] = band.decimation_level + 1
         df_columns_dict["lower_bound_index"][i_band] = band.index_min
         df_columns_dict["upper_bound_index"][i_band] = band.index_max
+        df_columns_dict["frequency_min"][i_band] = band.frequency_min
+        df_columns_dict["frequency_max"][i_band] = band.frequency_max
     out_df = pd.DataFrame(data=df_columns_dict)
     out_df.sort_values(by="lower_bound_index", inplace=True)
     return out_df