Deprecate create_run_processing_object

Cleanup, towards issue#150. Also reduced to only one test (py3.8) [Issue(s): #150]
simpeg · Sep 9, 2022 · 0f5f6f9 · 0f5f6f9
1 parent fcf1821
commit 0f5f6f9
Show file tree

Hide file tree

Showing 8 changed files with 53 additions and 43 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -18,7 +18,7 @@ jobs:
       fail-fast: false
       matrix:
         os: ["ubuntu-latest"]
-        python-version: [3.8, 3.7, 3.6] 
+        python-version: [3.8,]# 3.7, 3.6] 
 
     steps:
     - uses: actions/checkout@v2

diff --git a/aurora/config/config_creator.py b/aurora/config/config_creator.py
@@ -6,6 +6,7 @@
 
 
 """
+from deprecated import deprecated
 from pathlib import Path
 
 from aurora.config import Processing, Station, Run, BANDS_DEFAULT_FILE
@@ -16,6 +17,35 @@ def __init__(self, **kwargs):
         default_config_path = Path("config")
         self.config_path = kwargs.get("config_path", default_config_path)
 
+    def processing_id(self, kernel_dataset):
+        """
+        In the past, we used f"{local}-{remote}" or  f"{local}-{run_id}"
+        Neither of these is sufficiently unique.  In fact, they only describe the
+        dataset, and not the processing config.  It is difficult to see how to make a
+        comprehensive, unique id without it being very long or involving hash
+        functions.
+
+        For now, will try to use {local}-{local_runs}-{remote}-{remote_runs},
+        which at least describes the dataset, then a string can be generated by the
+        config and appended if needed.
+
+
+        Parameters
+        ----------
+        kernel_dataset
+
+        Returns
+        -------
+
+        """
+        id = f"{kernel_dataset.local_station_id}-{kernel_dataset.remote_station_id}"
+        return id
+
+    def _create(
+        self, input_channels=["hx", "hy"], output_channels=["hz", "ex", "ey"], **kwargs
+    ):
+        pass
+
     def create_from_kernel_dataset(
         self,
         kernel_dataset,
@@ -27,13 +57,12 @@ def create_from_kernel_dataset(
         **kwargs,
     ):
 
-        processing_id = (
-            f"{kernel_dataset.local_station_id}-{kernel_dataset.remote_station_id}"
-        )
+        processing_id = self.processing_id(kernel_dataset)
         processing_obj = Processing(id=processing_id, **kwargs)
+
+        # pack station and run info into processing object
         processing_obj.stations.from_dataset_dataframe(kernel_dataset.df)
-        # runs = []
-        # PACK RUNS HERE
+
         processing_obj.set_frequency_bands(
             emtf_band_file=emtf_band_file,
             band_edges=band_edges,
@@ -50,6 +79,7 @@ def create_from_kernel_dataset(
                     pass
         return processing_obj
 
+    @deprecated
     def create_run_processing_object(
         self,
         station_id=None,
@@ -71,7 +101,7 @@ def create_run_processing_object(
         Parameters
         ----------
         station_id
-        run_id
+        run_id: string or list of strings.
         mth5_path
         sample_rate
         input_channels

diff --git a/aurora/config/metadata/channel_nomenclature.py b/aurora/config/metadata/channel_nomenclature.py
@@ -13,8 +13,8 @@
 # =============================================================================
 attr_dict = get_schema("channel_nomenclature", SCHEMA_FN_PATHS)
 
-
-DEFAULT_CHANNEL_MAP = {
+CHANNEL_MAPS = {}
+CHANNEL_MAPS["default"] = {
     "hx": "hx",
     "hy": "hy",
     "hz": "hz",
@@ -105,13 +105,13 @@ def keyword(self, keyword):
 
     def get_channel_map(self, keyword):
         if keyword == "default":
-            channel_map = DEFAULT_CHANNEL_MAP
+            channel_map = CHANNEL_MAPS["default"]
         elif keyword == "LEMI12":
             channel_map = LEMI_CHANNEL_MAP_12
         elif keyword == "LEMI34":
             channel_map = LEMI_CHANNEL_MAP_34
         elif keyword == "NIMS":
-            channel_map = DEFAULT_CHANNEL_MAP
+            channel_map = CHANNEL_MAPS["default"]
         elif keyword == "PHOENIX123":
             channel_map = PHOENIX_CHANNEL_MAP_123
         else:

diff --git a/aurora/transfer_function/kernel_dataset.py b/aurora/transfer_function/kernel_dataset.py
@@ -42,8 +42,7 @@
 8. This is now a TFKernel Dataset Definition (ish).  Initialize a default
 processing object and pass it this df:
 cc = ConfigCreator(config_path=CONFIG_PATH)
-p = cc.create_run_processing_object(emtf_band_file=emtf_band_setup_file)
-p.stations.from_dataset_dataframe(dd_df)
+p = cc.create_from_kernel_dataset(kernel_dataset, emtf_band_file=emtf_band_setup_file)
 9. Edit the Processing appropriately,
 
 """

diff --git a/aurora/transfer_function/weights/coherence_weights.py b/aurora/transfer_function/weights/coherence_weights.py
@@ -87,6 +87,8 @@ def coherence_weights_v00(x, y, threshold=0.95):  # 975):#0.98
 
 def compute_coherence_weights(X, Y, RR, coh_type="local"):
     """
+    2022-09-09: This method is not yet supported.  It needs to be made
+    tolerant of channel_nomenclature.
 
     Parameters
     ----------

diff --git a/tests/cas04/02b_process_cas04_mth5.py b/tests/cas04/02b_process_cas04_mth5.py
@@ -157,10 +157,7 @@ def process_station_runs(local_station_id, remote_station_id="", station_runs={}
 
     cc = ConfigCreator()
     cc = ConfigCreator(config_path=CONFIG_PATH)
-    pc = cc.create_run_processing_object(
-        emtf_band_file=BANDS_DEFAULT_FILE, sample_rate=1.0
-    )
-    pc.stations.from_dataset_dataframe(kernel_dataset.df)
+    pc = cc.create_from_kernel_dataset(kernel_dataset)
     pc.validate()
     z_file_name = tmp_station_runs.z_file_name(AURORA_RESULTS_PATH)
     tf_result = process_mth5(
@@ -322,15 +319,11 @@ def process_with_remote(local, remote, band_setup_file="band_setup_emtf_nims.txt
     kernel_dataset.drop_runs_shorter_than(15000)
 
     # Add a method to ensure all samplintg rates are the same
-    sr = kernel_dataset.df.sample_rate.unique()
 
-    cc = ConfigCreator()  # config_path=CONFIG_PATH)
-    band_setup_file = "band_setup_emtf_nims.txt"
-    # band_setup_file = BANDS_DEFAULT_FILE
-    config = cc.create_run_processing_object(
-        emtf_band_file=band_setup_file, sample_rate=sr[0]
+    cc = ConfigCreator()
+    config = cc.create_from_kernel_dataset(
+        kernel_dataset, emtf_band_file=band_setup_file
     )
-    config.stations.from_dataset_dataframe(kernel_dataset.df)
     for decimation in config.decimations:
         decimation.window.type = "hamming"
     show_plot = False

diff --git a/tests/parkfield/test_process_parkfield_run.py b/tests/parkfield/test_process_parkfield_run.py
@@ -47,12 +47,7 @@ def test_processing(return_collection=False, z_file_path=None, test_clock_zero=F
     tfk_dataset.from_run_summary(run_summary, "PKD")
 
     cc = ConfigCreator(config_path=CONFIG_PATH)
-    # config = cc.create_from_kernel_dataset(tfk_dataset, estimator={"engine": "RME"})
-    config = cc.create_run_processing_object(
-        emtf_band_file=BANDS_DEFAULT_FILE,
-        sample_rate=tfk_dataset.sample_rate,
-        estimator={"engine": "RME"},
-    )
+    config = cc.create_from_kernel_dataset(tfk_dataset, estimator={"engine": "RME"})
 
     if test_clock_zero:
         for dec_lvl_cfg in config.decimations:

diff --git a/tests/synthetic/test_multi_run.py b/tests/synthetic/test_multi_run.py
@@ -26,13 +26,9 @@ def test_each_run_individually():
         ]
         keep_or_drop = "keep"
         kernel_dataset.select_station_runs(station_runs_dict, keep_or_drop)
-        print(kernel_dataset.df)
         cc = ConfigCreator()
-        sample_rate = kernel_dataset.df.sample_rate.iloc[0]
-        config = cc.create_run_processing_object(
-            emtf_band_file=BANDS_DEFAULT_FILE, sample_rate=sample_rate
-        )
-        config.stations.from_dataset_dataframe(kernel_dataset.df)
+        config = cc.create_from_kernel_dataset(kernel_dataset)
+
         for decimation in config.decimations:
             decimation.estimator.engine = "RME"
         show_plot = False  # True
@@ -62,13 +58,8 @@ def test_all_runs():
     kernel_dataset = KernelDataset()
     kernel_dataset.from_run_summary(run_summary, "test3")
     cc = ConfigCreator()
-    sample_rate = kernel_dataset.df.sample_rate.iloc[0]
-    config = cc.create_run_processing_object(
-        emtf_band_file=BANDS_DEFAULT_FILE, sample_rate=sample_rate
-    )
-    config.stations.from_dataset_dataframe(kernel_dataset.df)
-    for decimation in config.decimations:
-        decimation.estimator.engine = "RME"
+    config = cc.create_from_kernel_dataset(kernel_dataset, estimator={"engine": "RME"})
+
     show_plot = False  # True
     z_file_path = AURORA_RESULTS_PATH.joinpath("syn3_all.zss")
     tf_cls = process_mth5(