initial commit

GeoscienceAustralia · Sep 24, 2024 · 15c5e04 · 15c5e04
1 parent 9f3eb14
commit 15c5e04
Show file tree

Hide file tree

Showing 9 changed files with 546 additions and 30 deletions.
diff --git a/seismic/catalogue/__init__.py b/seismic/catalogue/__init__.py
diff --git a/seismic/catalogue/gcmt.py b/seismic/catalogue/gcmt.py
@@ -0,0 +1,180 @@
+import numpy as np
+import pandas as pd
+from pandas.core.series import Series
+from scipy.spatial import cKDTree
+from pyproj import Geod
+from obspy.core import UTCDateTime
+from collections import defaultdict
+from obspy.geodetics.base import degrees2kilometers
+from seismic.misc import rtp2xyz
+
+def azimuth_difference(a, b, p, ellipse='WGS84'):
+    geod = Geod(ellps=ellipse)
+
+    # ab    Geodesic from A to B.
+    # az_ab Azimuth of geodesic from A to B.
+    az_ab, _, _ = geod.inv(a[0], a[1], b[0], b[1])
+
+    # ap    Geodesic from A to P.
+    # az_ap Azimuth of geodesic from A to P.
+    az_ap, _, d_ap = geod.inv(a[0], a[1], p[0], p[1])
+
+    # bp    Geodesic from B to P.
+    # az_bp Azimuth of geodesic from B to P.
+    az_bp, _, d_bp = geod.inv(b[0], b[1], p[0], p[1])
+
+    if(d_ap > d_bp):
+        return np.min(np.array([np.fabs(az_ab - az_ap), np.fabs(az_ab-az_bp)]))
+    else:
+        return azimuth_difference(b, a, p)
+    # end if
+# end func
+
+class GCMTCatalog:
+    def __init__(self, fn, ellipse='WGS84'):
+        def read_gcmt_catalog(fn):
+            """
+            @param fn: GCMT catalog file name in text format. The expected columns (space-separated) are:
+            lonp lon lat dep mrr mtt mff mrt mrf mtf exp EventOrigintim DC CLVD VOL Mw str1 dip1 rak1 \
+            str2 dip2 rak2 plunP azP plunT azT Hlonp Hlon Hlat Hdep Ctime Chdur MwG base Bst Bco Bmp Bper \
+            Sst Sco Smp Sper Mst Mco Mmp Mper
+            @return: catalog in a pandas dataframe
+            """
+            def convert_to_timestamp(eotime):
+                a = str(eotime)
+                b = a[0:4] + '-' + a[4:6] + '-' + a[6:8] + 'T' + a[8:10] + ':' + a[10:12] + ':' + a[12:]
+                return UTCDateTime(b).timestamp
+
+            # end func
+
+            cat = pd.read_csv(fn, header=[1], delimiter='\s+')
+            cat['EventOrigintim'] = cat['EventOrigintim'].map(convert_to_timestamp)
+
+            return cat
+        # end func
+
+        self.fn = fn
+        self.cat = read_gcmt_catalog(fn)
+        self.EARTH_RADIUS_KM = 6371.
+        self.tree = None
+        self.geod = Geod(ellps=ellipse)
+    # end func
+
+    def get_compatible_events(self, station_lon1, station_lat1,
+                              station_lon2, station_lat2,
+                              max_areal_separation_km=15,
+                              max_depth_separation_km=10,
+                              min_magnitude=-1,
+                              az_range=80, min_event_dist_deg=10,
+                              max_event_dist_deg=100,
+                              max_mt_angle=15):
+        """
+        For a given pair of stations, finds a list of pairs of proximal earthquakes that meet the
+        given criteria for event proximity, distance, azimuth and magnitude
+        @param station_lon1: longitude of station 1
+        @param station_lat1: latitude of station 1
+        @param station_lon2: longitude of station 2
+        @param station_lat2: latitude of station 2
+        @param max_areal_separation_km: maximum areal separation of event-pair
+        @param max_depth_separation_km: maximum separation of event-pair in depth
+        @param min_magnitude: minimum magnitude of earthquakes
+        @param az_range: (+/-) azimuth range
+        @param min_event_dist_deg: minimum distance of event epicentres from either station in degrees
+        @param max_event_dist_deg: maximum distance of event epicentres from either station in degrees
+        @param max_mt_angle: maximum moment-tensor angle between event-pairs
+        @return: dictionary indexed by a pair of event IDs, with the moment-tensor angle between
+                 them as the value
+        """
+
+        # create kDTree for spatial queries if not done so yet
+        if(self.tree is None):
+            # create kdtree for spatial queries
+            r = np.ones(len(self.cat)) * self.EARTH_RADIUS_KM
+            t = np.radians(90 - self.cat['lat'])
+            p = np.radians(self.cat['lon'])
+
+            xyz = rtp2xyz(r, t, p)
+            self.tree = cKDTree(xyz)
+        # end if
+
+        MIN_DIST = degrees2kilometers(min_event_dist_deg) * 1e3
+        MAX_DIST = degrees2kilometers(max_event_dist_deg) * 1e3
+
+        p1 = [station_lon1, station_lat1]
+        p2 = [station_lon2, station_lat2]
+        az, baz, dist = self.geod.inv(p1[0], p1[1], p2[0], p2[1])
+
+        eaz1, ebaz1, edist1 = self.geod.inv(np.ones(len(self.cat)) * p1[0],
+                                            np.ones(len(self.cat)) * p1[1],
+                                            self.cat['lon'], self.cat['lat'])
+        eaz2, ebaz2, edist2 = self.geod.inv(np.ones(len(self.cat)) * p2[0],
+                                            np.ones(len(self.cat)) * p2[1],
+                                            self.cat['lon'], self.cat['lat'])
+
+        # find event IDs that match the given criteria
+        good_ids = ((eaz1 > (baz - az_range)) & (eaz1 < (baz + az_range))) | \
+                   ((eaz2 > (az - az_range)) & (eaz2 < (az + az_range)))
+        good_ids &= ((edist1 >= MIN_DIST) & (edist1 <= MAX_DIST)) & \
+                    ((edist2 >= MIN_DIST) & (edist2 <= MAX_DIST))
+        good_ids &= (self.cat['Mw'] >= min_magnitude)
+
+        # find all proximal events
+        qr = np.ones(np.sum(good_ids)) * self.EARTH_RADIUS_KM
+        qt = np.radians(90 - self.cat['lat'][good_ids])
+        qp = np.radians(self.cat['lon'][good_ids])
+
+        qxyz = rtp2xyz(qr, qt, qp)
+        id_lists = self.tree.query_ball_point(qxyz, max_areal_separation_km)
+
+        result = defaultdict(list)
+        # find angles between all proximal events
+        for ids in id_lists:
+            if (len(ids) < 2): continue
+
+            ids = np.array(ids)
+            # drop events by magnitude
+            ids = ids[(self.cat['Mw'][ids] >= min_magnitude)]
+
+            gmat = np.array(self.cat.iloc[ids, 4:10]).T
+            gmat_norm = np.linalg.norm(gmat, axis=0)
+
+            gmat /= gmat_norm
+            angles = np.arccos(np.clip(np.matmul(gmat.T, gmat), -1, 1))
+
+            mask = np.zeros(angles.shape)
+            mask[np.mask_indices(angles.shape[0], np.tril)] = 1
+            angles = np.degrees(np.ma.masked_array(angles, mask=mask))
+
+            s_ids = np.argsort(angles.flatten())
+            s_ids_i, s_ids_j = np.unravel_index(s_ids, angles.shape)
+
+            for i, j in zip(s_ids_i, s_ids_j):
+                if (not np.ma.is_masked(angles[i, j])):
+                    # drop events by depth-difference limit
+                    depth_difference = np.fabs(self.cat['dep'][ids[i]] - self.cat['dep'][ids[j]])
+                    if(depth_difference > max_depth_separation_km): continue
+
+                    if(angles[i, j] > max_mt_angle): break
+                    result[(ids[i], ids[j])] = angles[i, j]
+                # end if
+            # end for
+        # end for
+
+        return result
+    # end func
+
+    @staticmethod
+    def get_mt_angle(e1:Series, e2:Series):
+        """
+        @param e1: an event row from a GCMTCatalog instance
+        @param e2: an event row from a GCMTCatalog instance
+        @return: moment-tensor angle between the two events
+        """
+
+        mt_angle = np.degrees(np.arccos(np.min([np.dot(e1.iloc[4:10],
+                                                       e2.iloc[4:10]) / \
+                                               (np.linalg.norm(e1.iloc[4:10]) *
+                                                np.linalg.norm(e2.iloc[4:10])), 1.])))
+        return mt_angle
+    # end func
+# end class
diff --git a/seismic/misc.py b/seismic/misc.py
@@ -86,6 +86,50 @@ def rtp2xyz(r, theta, phi):
     return xout
 # end func
 
+def read_key_value_pairs(file_path:str, keys:list, strict=False)->dict:
+    """
+    Reads a text file containing colon-separated key-value pairs and returns a dictionary with values for specified keys.
+    Raises a ValueError if any of the specified keys are not found.
+
+    :param file_path: Path to the text file.
+    :param keys: A list of keys to search for in the file.
+    :param strict: Ensures all keys are found in the file
+    :return: A dictionary with the specified keys and their corresponding values.
+    :raises ValueError: If any key is not found in the file, if strict is set to True.
+    """
+    result = {}
+    keys_found = set()
+
+    f = None
+    try:
+        f = open(file_path, 'r')
+    except Exception as e:
+        raise e
+    else:
+        with open(file_path, 'r') as f:
+            for line in f:
+                # Split the line by colon to get the key-value pair
+                if ':' in line:
+                    key, value = line.strip().split(':', 1)
+                    key, value = key.strip(), value.strip()  # Clean up extra spaces
+                    # If the key is in the provided list, add it to the result
+                    if key in keys:
+                        result[key] = value
+                        keys_found.add(key)
+                    # end if
+            # end for
+        # end with
+    # end try
+
+    # Check if any keys were not found
+    missing_keys = set(keys) - keys_found
+    if missing_keys:
+        raise ValueError(f"The following keys were not found in the file: {', '.join(missing_keys)}")
+    # end if
+
+    return result
+# end func
+
 def print_exception(e: Exception):
     exc_type, exc_obj, exc_tb = sys.exc_info()
     fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]

diff --git a/seismic/xcorqc/correlator.py b/seismic/xcorqc/correlator.py
@@ -43,10 +43,9 @@ def process(data_source1, data_source2, output_path,
             start_time='1970-01-01T00:00:00', end_time='2100-01-01T00:00:00',
             instrument_response_inventory=None, instrument_response_output='vel', water_level=50,
             clip_to_2std=False, whitening=False, whitening_window_frequency=0,
-            one_bit_normalize=False, location_preferences=None,
-            ds1_zchan=None, ds1_nchan=None, ds1_echan=None,
-            ds2_zchan=None, ds2_nchan=None, ds2_echan=None, corr_chan=None,
-            envelope_normalize=False, ensemble_stack=False, apply_stacking=True,
+            one_bit_normalize=False, location_preferences=None, ds1_zchan=None, ds1_nchan=None,
+            ds1_echan=None, ds2_zchan=None, ds2_nchan=None, ds2_echan=None, corr_chan=None,
+            envelope_normalize=False, ensemble_stack=False, subset_stack=False, apply_simple_stacking=True,
             restart=False, dry_run=False, no_tracking_tag=False, scratch_folder=None):
     """
     :param data_source1: Text file containing paths to ASDF files
@@ -120,6 +119,7 @@ def outputConfigParameters():
             if(whitening):
                 f.write('%35s\t\t\t: %s\n' % ('--whitening-window-frequency', whitening_window_frequency))
             f.write('%35s\t\t\t: %s\n' % ('--ensemble-stack', ensemble_stack))
+            f.write('%35s\t\t\t: %s\n' % ('--subset-stack', subset_stack))
             f.write('%35s\t\t\t: %s\n' % ('--restart', 'TRUE' if restart else 'FALSE'))
             f.write('%35s\t\t\t: %s\n' % ('--no-tracking-tag', 'TRUE' if no_tracking_tag else 'FALSE'))
             f.write('%35s\t\t\t: %s\n' % ('--scratch-folder', scratch_folder))
@@ -309,8 +309,8 @@ def get_loccha(cha1, cha2):
                                interval_seconds, window_seconds, window_overlap,
                                window_buffer_length, fmin, fmax, clip_to_2std, whitening,
                                whitening_window_frequency, one_bit_normalize, envelope_normalize,
-                               ensemble_stack, apply_stacking, output_path, 2, time_tag,
-                               scratch_folder, git_hash)
+                               ensemble_stack, subset_stack, apply_simple_stacking, output_path, 2,
+                               time_tag, scratch_folder, git_hash)
         # end for
     # end for
 # end func
@@ -319,9 +319,9 @@ def get_loccha(cha1, cha2):
 CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'], show_default=True)
 @click.command(context_settings=CONTEXT_SETTINGS)
 @click.argument('data-source1',
-                type=click.Path('r'))
+                type=click.Path(exists=True))
 @click.argument('data-source2',
-                type=click.Path('r'))
+                type=click.Path(exists=True))
 @click.argument('output-path', required=True,
                 type=click.Path(exists=True))
 @click.argument('window-seconds', required=True,
@@ -363,7 +363,7 @@ def get_loccha(cha1, cha2):
 @click.option('--station-names2', default='*', type=str,
               help="Either station name(s) (space-delimited) or a text file containing NET.STA entries in each line to "
                    "process in data-source-2; default is '*', which processes all available stations.")
-@click.option('--pairs-to-compute', default=None, type=click.Path('r'),
+@click.option('--pairs-to-compute', default=None, type=click.Path(exists=True),
               help="Text file containing station pairs (NET.STA.NET.STA) for which cross-correlations are to be computed."
                    "Note that this parameter is intended as a way to restrict the number of computations to only the "
                    "station-pairs listed in the text-file.")
@@ -374,7 +374,7 @@ def get_loccha(cha1, cha2):
               type=str,
               help="Date and time (in UTC format) to stop at")
 @click.option('--instrument-response-inventory', default=None,
-              type=click.Path('r'),
+              type=click.Path(exists=True),
               help="FDSNxml inventory containing instrument response information. Note that when this parameter is provided, "
                    "instrument response corrections are automatically applied for matching stations with response "
                    "information.")
@@ -401,7 +401,7 @@ def get_loccha(cha1, cha2):
               help="Apply one-bit normalization to data in each window.  Note that the default time-domain normalization "
                    "is N(0,1), i.e. 0-mean and unit variance")
 @click.option('--location-preferences', default=None,
-              type=click.Path('r'),
+              type=click.Path(exists=True),
               help="A comma-separated two-columned text file containing location code preferences for "
                    "stations in the form: 'NET.STA, LOC'. Note that location code preferences need not "
                    "be provided for all stations -- the default is None. This approach allows for "
@@ -439,6 +439,20 @@ def get_loccha(cha1, cha2):
                                                      "for a given station-pair. In other words, stacks over "
                                                      "'interval-seconds' are in turn stacked to produce a "
                                                      "single cross-correlation function")
+@click.option('--subset-stack', is_flag=True,
+              help="Outputs a number of stacks of cross-correlation subsets as identified based on the presence "
+                   "or absence of azimuthal earthquake energy within cross-correlation windows, as outlined in "
+                   "Hejrani et al. (in prep.). Note that this option is not compatible with --ensemble-stack and "
+                   "--stacking-interval-seconds. A file named subset_stack.conf is expected in the current working "
+                   "folder, with appropriate colon-separated values for the keys below: "
+                   "CMT_CATALOG_PATH: 'path/to/CMT/catalog'"
+                   "SW_VMIN: 2"
+                   "SW_VMAX: 5.5"
+                   "DIST_MIN: 0"
+                   "DIST_MAX: 999"
+                   "EMAG_MIN: 6"
+                   "EMAG_MAX: 999"
+                   "AZ_TOL: 30")
 @click.option('--restart', default=False, is_flag=True, help='Restart job')
 @click.option('--dry-run', default=False, is_flag=True, help='Dry run for printing out station-pairs and '
                                                              'additional stats.')
@@ -451,7 +465,7 @@ def main(data_source1, data_source2, output_path, window_seconds, window_overlap
          start_time, end_time, instrument_response_inventory, instrument_response_output, water_level,
          clip_to_2std, whitening, whitening_window_frequency, one_bit_normalize, location_preferences,
          ds1_zchan, ds1_nchan, ds1_echan, ds2_zchan, ds2_nchan, ds2_echan, corr_chan, envelope_normalize,
-         ensemble_stack, restart, dry_run, no_tracking_tag, scratch_folder):
+         ensemble_stack, subset_stack, restart, dry_run, no_tracking_tag, scratch_folder):
     """
     DATA_SOURCE1: Text file containing paths to ASDF files \n
     DATA_SOURCE2: Text file containing paths to ASDF files \n
@@ -476,11 +490,16 @@ def main(data_source1, data_source2, output_path, window_seconds, window_overlap
     if(stacking_interval_seconds is not None):
         if(stacking_interval_seconds < window_seconds):
             raise ValueError('Invalid value for --stacking-interval-seconds, must be > WINDOW_SECONDS')
-        if (stacking_interval_seconds > window_seconds * read_ahead_windows):
+        if(stacking_interval_seconds > window_seconds * read_ahead_windows):
             raise ValueError("""Invalid value for --stacking-interval-seconds, \
                                 must be < WINDOW_SECONDS*READ_AHEAD_WINDOWS""")
-    # end if
 
+        if(subset_stack): raise ValueError('Subset-stacking based on a GCMT catalog is incompatible with stacking over '
+                                           'fixed intervals, e.g. over 24 hrs, as done through --stacking-interval-seconds')
+    # end if
+    if(ensemble_stack and subset_stack):
+        raise ValueError('Ensemble-stacking and subset-stacking are mutually exclusive options')
+    # end if
 
     #######################################################
     # Compute amount of data to be read in in each IO call,
@@ -489,7 +508,7 @@ def main(data_source1, data_source2, output_path, window_seconds, window_overlap
     #######################################################
     interval_seconds = None
     read_ahead_window_seconds = None
-    apply_stacking = None
+    apply_simple_stacking = False
     if(stacking_interval_seconds is None):
         if(ensemble_stack):
             raise ValueError('--ensemble-stack is only applicable with --stacking-interval-seconds. Aborting..')
@@ -500,12 +519,13 @@ def main(data_source1, data_source2, output_path, window_seconds, window_overlap
                                     window_seconds * window_buffer_length * 2 + \
                                     window_overlap * window_seconds * 2
         interval_seconds = read_ahead_window_seconds
-        apply_stacking = False
+        apply_simple_stacking = False
+
         #print(read_ahead_window_seconds)
     else:
         read_ahead_window_seconds = window_seconds * read_ahead_windows
         interval_seconds = stacking_interval_seconds
-        apply_stacking = True
+        apply_simple_stacking = True
     # end if
 
     process(data_source1, data_source2, output_path, interval_seconds, window_seconds, window_overlap,
@@ -514,7 +534,8 @@ def main(data_source1, data_source2, output_path, window_seconds, window_overlap
             start_time, end_time, instrument_response_inventory, instrument_response_output, water_level,
             clip_to_2std, whitening, whitening_window_frequency, one_bit_normalize, location_preferences,
             ds1_zchan, ds1_nchan, ds1_echan, ds2_zchan, ds2_nchan, ds2_echan, corr_chan, envelope_normalize,
-            ensemble_stack, apply_stacking, restart, dry_run, no_tracking_tag, scratch_folder)
+            ensemble_stack, subset_stack, apply_simple_stacking, restart, dry_run, no_tracking_tag,
+            scratch_folder)
 # end func
 
 if __name__ == '__main__':