Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
geojunky committed Sep 24, 2024
1 parent 9f3eb14 commit 15c5e04
Show file tree
Hide file tree
Showing 9 changed files with 546 additions and 30 deletions.
Empty file added seismic/catalogue/__init__.py
Empty file.
180 changes: 180 additions & 0 deletions seismic/catalogue/gcmt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
import numpy as np
import pandas as pd
from pandas.core.series import Series
from scipy.spatial import cKDTree
from pyproj import Geod
from obspy.core import UTCDateTime
from collections import defaultdict
from obspy.geodetics.base import degrees2kilometers
from seismic.misc import rtp2xyz

def azimuth_difference(a, b, p, ellipse='WGS84'):
geod = Geod(ellps=ellipse)

# ab Geodesic from A to B.
# az_ab Azimuth of geodesic from A to B.
az_ab, _, _ = geod.inv(a[0], a[1], b[0], b[1])

# ap Geodesic from A to P.
# az_ap Azimuth of geodesic from A to P.
az_ap, _, d_ap = geod.inv(a[0], a[1], p[0], p[1])

# bp Geodesic from B to P.
# az_bp Azimuth of geodesic from B to P.
az_bp, _, d_bp = geod.inv(b[0], b[1], p[0], p[1])

if(d_ap > d_bp):
return np.min(np.array([np.fabs(az_ab - az_ap), np.fabs(az_ab-az_bp)]))
else:
return azimuth_difference(b, a, p)
# end if
# end func

class GCMTCatalog:
def __init__(self, fn, ellipse='WGS84'):
def read_gcmt_catalog(fn):
"""
@param fn: GCMT catalog file name in text format. The expected columns (space-separated) are:
lonp lon lat dep mrr mtt mff mrt mrf mtf exp EventOrigintim DC CLVD VOL Mw str1 dip1 rak1 \
str2 dip2 rak2 plunP azP plunT azT Hlonp Hlon Hlat Hdep Ctime Chdur MwG base Bst Bco Bmp Bper \
Sst Sco Smp Sper Mst Mco Mmp Mper
@return: catalog in a pandas dataframe
"""
def convert_to_timestamp(eotime):
a = str(eotime)
b = a[0:4] + '-' + a[4:6] + '-' + a[6:8] + 'T' + a[8:10] + ':' + a[10:12] + ':' + a[12:]
return UTCDateTime(b).timestamp

# end func

cat = pd.read_csv(fn, header=[1], delimiter='\s+')
cat['EventOrigintim'] = cat['EventOrigintim'].map(convert_to_timestamp)

return cat
# end func

self.fn = fn
self.cat = read_gcmt_catalog(fn)
self.EARTH_RADIUS_KM = 6371.
self.tree = None
self.geod = Geod(ellps=ellipse)
# end func

def get_compatible_events(self, station_lon1, station_lat1,
station_lon2, station_lat2,
max_areal_separation_km=15,
max_depth_separation_km=10,
min_magnitude=-1,
az_range=80, min_event_dist_deg=10,
max_event_dist_deg=100,
max_mt_angle=15):
"""
For a given pair of stations, finds a list of pairs of proximal earthquakes that meet the
given criteria for event proximity, distance, azimuth and magnitude
@param station_lon1: longitude of station 1
@param station_lat1: latitude of station 1
@param station_lon2: longitude of station 2
@param station_lat2: latitude of station 2
@param max_areal_separation_km: maximum areal separation of event-pair
@param max_depth_separation_km: maximum separation of event-pair in depth
@param min_magnitude: minimum magnitude of earthquakes
@param az_range: (+/-) azimuth range
@param min_event_dist_deg: minimum distance of event epicentres from either station in degrees
@param max_event_dist_deg: maximum distance of event epicentres from either station in degrees
@param max_mt_angle: maximum moment-tensor angle between event-pairs
@return: dictionary indexed by a pair of event IDs, with the moment-tensor angle between
them as the value
"""

# create kDTree for spatial queries if not done so yet
if(self.tree is None):
# create kdtree for spatial queries
r = np.ones(len(self.cat)) * self.EARTH_RADIUS_KM
t = np.radians(90 - self.cat['lat'])
p = np.radians(self.cat['lon'])

xyz = rtp2xyz(r, t, p)
self.tree = cKDTree(xyz)
# end if

MIN_DIST = degrees2kilometers(min_event_dist_deg) * 1e3
MAX_DIST = degrees2kilometers(max_event_dist_deg) * 1e3

p1 = [station_lon1, station_lat1]
p2 = [station_lon2, station_lat2]
az, baz, dist = self.geod.inv(p1[0], p1[1], p2[0], p2[1])

eaz1, ebaz1, edist1 = self.geod.inv(np.ones(len(self.cat)) * p1[0],
np.ones(len(self.cat)) * p1[1],
self.cat['lon'], self.cat['lat'])
eaz2, ebaz2, edist2 = self.geod.inv(np.ones(len(self.cat)) * p2[0],
np.ones(len(self.cat)) * p2[1],
self.cat['lon'], self.cat['lat'])

# find event IDs that match the given criteria
good_ids = ((eaz1 > (baz - az_range)) & (eaz1 < (baz + az_range))) | \
((eaz2 > (az - az_range)) & (eaz2 < (az + az_range)))
good_ids &= ((edist1 >= MIN_DIST) & (edist1 <= MAX_DIST)) & \
((edist2 >= MIN_DIST) & (edist2 <= MAX_DIST))
good_ids &= (self.cat['Mw'] >= min_magnitude)

# find all proximal events
qr = np.ones(np.sum(good_ids)) * self.EARTH_RADIUS_KM
qt = np.radians(90 - self.cat['lat'][good_ids])
qp = np.radians(self.cat['lon'][good_ids])

qxyz = rtp2xyz(qr, qt, qp)
id_lists = self.tree.query_ball_point(qxyz, max_areal_separation_km)

result = defaultdict(list)
# find angles between all proximal events
for ids in id_lists:
if (len(ids) < 2): continue

ids = np.array(ids)
# drop events by magnitude
ids = ids[(self.cat['Mw'][ids] >= min_magnitude)]

gmat = np.array(self.cat.iloc[ids, 4:10]).T
gmat_norm = np.linalg.norm(gmat, axis=0)

gmat /= gmat_norm
angles = np.arccos(np.clip(np.matmul(gmat.T, gmat), -1, 1))

mask = np.zeros(angles.shape)
mask[np.mask_indices(angles.shape[0], np.tril)] = 1
angles = np.degrees(np.ma.masked_array(angles, mask=mask))

s_ids = np.argsort(angles.flatten())
s_ids_i, s_ids_j = np.unravel_index(s_ids, angles.shape)

for i, j in zip(s_ids_i, s_ids_j):
if (not np.ma.is_masked(angles[i, j])):
# drop events by depth-difference limit
depth_difference = np.fabs(self.cat['dep'][ids[i]] - self.cat['dep'][ids[j]])
if(depth_difference > max_depth_separation_km): continue

if(angles[i, j] > max_mt_angle): break
result[(ids[i], ids[j])] = angles[i, j]
# end if
# end for
# end for

return result
# end func

@staticmethod
def get_mt_angle(e1:Series, e2:Series):
"""
@param e1: an event row from a GCMTCatalog instance
@param e2: an event row from a GCMTCatalog instance
@return: moment-tensor angle between the two events
"""

mt_angle = np.degrees(np.arccos(np.min([np.dot(e1.iloc[4:10],
e2.iloc[4:10]) / \
(np.linalg.norm(e1.iloc[4:10]) *
np.linalg.norm(e2.iloc[4:10])), 1.])))
return mt_angle
# end func
# end class
44 changes: 44 additions & 0 deletions seismic/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,50 @@ def rtp2xyz(r, theta, phi):
return xout
# end func

def read_key_value_pairs(file_path:str, keys:list, strict=False)->dict:
"""
Reads a text file containing colon-separated key-value pairs and returns a dictionary with values for specified keys.
Raises a ValueError if any of the specified keys are not found.
:param file_path: Path to the text file.
:param keys: A list of keys to search for in the file.
:param strict: Ensures all keys are found in the file
:return: A dictionary with the specified keys and their corresponding values.
:raises ValueError: If any key is not found in the file, if strict is set to True.
"""
result = {}
keys_found = set()

f = None
try:
f = open(file_path, 'r')
except Exception as e:
raise e
else:
with open(file_path, 'r') as f:
for line in f:
# Split the line by colon to get the key-value pair
if ':' in line:
key, value = line.strip().split(':', 1)
key, value = key.strip(), value.strip() # Clean up extra spaces
# If the key is in the provided list, add it to the result
if key in keys:
result[key] = value
keys_found.add(key)
# end if
# end for
# end with
# end try

# Check if any keys were not found
missing_keys = set(keys) - keys_found
if missing_keys:
raise ValueError(f"The following keys were not found in the file: {', '.join(missing_keys)}")
# end if

return result
# end func

def print_exception(e: Exception):
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
Expand Down
57 changes: 39 additions & 18 deletions seismic/xcorqc/correlator.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,9 @@ def process(data_source1, data_source2, output_path,
start_time='1970-01-01T00:00:00', end_time='2100-01-01T00:00:00',
instrument_response_inventory=None, instrument_response_output='vel', water_level=50,
clip_to_2std=False, whitening=False, whitening_window_frequency=0,
one_bit_normalize=False, location_preferences=None,
ds1_zchan=None, ds1_nchan=None, ds1_echan=None,
ds2_zchan=None, ds2_nchan=None, ds2_echan=None, corr_chan=None,
envelope_normalize=False, ensemble_stack=False, apply_stacking=True,
one_bit_normalize=False, location_preferences=None, ds1_zchan=None, ds1_nchan=None,
ds1_echan=None, ds2_zchan=None, ds2_nchan=None, ds2_echan=None, corr_chan=None,
envelope_normalize=False, ensemble_stack=False, subset_stack=False, apply_simple_stacking=True,
restart=False, dry_run=False, no_tracking_tag=False, scratch_folder=None):
"""
:param data_source1: Text file containing paths to ASDF files
Expand Down Expand Up @@ -120,6 +119,7 @@ def outputConfigParameters():
if(whitening):
f.write('%35s\t\t\t: %s\n' % ('--whitening-window-frequency', whitening_window_frequency))
f.write('%35s\t\t\t: %s\n' % ('--ensemble-stack', ensemble_stack))
f.write('%35s\t\t\t: %s\n' % ('--subset-stack', subset_stack))
f.write('%35s\t\t\t: %s\n' % ('--restart', 'TRUE' if restart else 'FALSE'))
f.write('%35s\t\t\t: %s\n' % ('--no-tracking-tag', 'TRUE' if no_tracking_tag else 'FALSE'))
f.write('%35s\t\t\t: %s\n' % ('--scratch-folder', scratch_folder))
Expand Down Expand Up @@ -309,8 +309,8 @@ def get_loccha(cha1, cha2):
interval_seconds, window_seconds, window_overlap,
window_buffer_length, fmin, fmax, clip_to_2std, whitening,
whitening_window_frequency, one_bit_normalize, envelope_normalize,
ensemble_stack, apply_stacking, output_path, 2, time_tag,
scratch_folder, git_hash)
ensemble_stack, subset_stack, apply_simple_stacking, output_path, 2,
time_tag, scratch_folder, git_hash)
# end for
# end for
# end func
Expand All @@ -319,9 +319,9 @@ def get_loccha(cha1, cha2):
CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'], show_default=True)
@click.command(context_settings=CONTEXT_SETTINGS)
@click.argument('data-source1',
type=click.Path('r'))
type=click.Path(exists=True))
@click.argument('data-source2',
type=click.Path('r'))
type=click.Path(exists=True))
@click.argument('output-path', required=True,
type=click.Path(exists=True))
@click.argument('window-seconds', required=True,
Expand Down Expand Up @@ -363,7 +363,7 @@ def get_loccha(cha1, cha2):
@click.option('--station-names2', default='*', type=str,
help="Either station name(s) (space-delimited) or a text file containing NET.STA entries in each line to "
"process in data-source-2; default is '*', which processes all available stations.")
@click.option('--pairs-to-compute', default=None, type=click.Path('r'),
@click.option('--pairs-to-compute', default=None, type=click.Path(exists=True),
help="Text file containing station pairs (NET.STA.NET.STA) for which cross-correlations are to be computed."
"Note that this parameter is intended as a way to restrict the number of computations to only the "
"station-pairs listed in the text-file.")
Expand All @@ -374,7 +374,7 @@ def get_loccha(cha1, cha2):
type=str,
help="Date and time (in UTC format) to stop at")
@click.option('--instrument-response-inventory', default=None,
type=click.Path('r'),
type=click.Path(exists=True),
help="FDSNxml inventory containing instrument response information. Note that when this parameter is provided, "
"instrument response corrections are automatically applied for matching stations with response "
"information.")
Expand All @@ -401,7 +401,7 @@ def get_loccha(cha1, cha2):
help="Apply one-bit normalization to data in each window. Note that the default time-domain normalization "
"is N(0,1), i.e. 0-mean and unit variance")
@click.option('--location-preferences', default=None,
type=click.Path('r'),
type=click.Path(exists=True),
help="A comma-separated two-columned text file containing location code preferences for "
"stations in the form: 'NET.STA, LOC'. Note that location code preferences need not "
"be provided for all stations -- the default is None. This approach allows for "
Expand Down Expand Up @@ -439,6 +439,20 @@ def get_loccha(cha1, cha2):
"for a given station-pair. In other words, stacks over "
"'interval-seconds' are in turn stacked to produce a "
"single cross-correlation function")
@click.option('--subset-stack', is_flag=True,
help="Outputs a number of stacks of cross-correlation subsets as identified based on the presence "
"or absence of azimuthal earthquake energy within cross-correlation windows, as outlined in "
"Hejrani et al. (in prep.). Note that this option is not compatible with --ensemble-stack and "
"--stacking-interval-seconds. A file named subset_stack.conf is expected in the current working "
"folder, with appropriate colon-separated values for the keys below: "
"CMT_CATALOG_PATH: 'path/to/CMT/catalog'"
"SW_VMIN: 2"
"SW_VMAX: 5.5"
"DIST_MIN: 0"
"DIST_MAX: 999"
"EMAG_MIN: 6"
"EMAG_MAX: 999"
"AZ_TOL: 30")
@click.option('--restart', default=False, is_flag=True, help='Restart job')
@click.option('--dry-run', default=False, is_flag=True, help='Dry run for printing out station-pairs and '
'additional stats.')
Expand All @@ -451,7 +465,7 @@ def main(data_source1, data_source2, output_path, window_seconds, window_overlap
start_time, end_time, instrument_response_inventory, instrument_response_output, water_level,
clip_to_2std, whitening, whitening_window_frequency, one_bit_normalize, location_preferences,
ds1_zchan, ds1_nchan, ds1_echan, ds2_zchan, ds2_nchan, ds2_echan, corr_chan, envelope_normalize,
ensemble_stack, restart, dry_run, no_tracking_tag, scratch_folder):
ensemble_stack, subset_stack, restart, dry_run, no_tracking_tag, scratch_folder):
"""
DATA_SOURCE1: Text file containing paths to ASDF files \n
DATA_SOURCE2: Text file containing paths to ASDF files \n
Expand All @@ -476,11 +490,16 @@ def main(data_source1, data_source2, output_path, window_seconds, window_overlap
if(stacking_interval_seconds is not None):
if(stacking_interval_seconds < window_seconds):
raise ValueError('Invalid value for --stacking-interval-seconds, must be > WINDOW_SECONDS')
if (stacking_interval_seconds > window_seconds * read_ahead_windows):
if(stacking_interval_seconds > window_seconds * read_ahead_windows):
raise ValueError("""Invalid value for --stacking-interval-seconds, \
must be < WINDOW_SECONDS*READ_AHEAD_WINDOWS""")
# end if

if(subset_stack): raise ValueError('Subset-stacking based on a GCMT catalog is incompatible with stacking over '
'fixed intervals, e.g. over 24 hrs, as done through --stacking-interval-seconds')
# end if
if(ensemble_stack and subset_stack):
raise ValueError('Ensemble-stacking and subset-stacking are mutually exclusive options')
# end if

#######################################################
# Compute amount of data to be read in in each IO call,
Expand All @@ -489,7 +508,7 @@ def main(data_source1, data_source2, output_path, window_seconds, window_overlap
#######################################################
interval_seconds = None
read_ahead_window_seconds = None
apply_stacking = None
apply_simple_stacking = False
if(stacking_interval_seconds is None):
if(ensemble_stack):
raise ValueError('--ensemble-stack is only applicable with --stacking-interval-seconds. Aborting..')
Expand All @@ -500,12 +519,13 @@ def main(data_source1, data_source2, output_path, window_seconds, window_overlap
window_seconds * window_buffer_length * 2 + \
window_overlap * window_seconds * 2
interval_seconds = read_ahead_window_seconds
apply_stacking = False
apply_simple_stacking = False

#print(read_ahead_window_seconds)
else:
read_ahead_window_seconds = window_seconds * read_ahead_windows
interval_seconds = stacking_interval_seconds
apply_stacking = True
apply_simple_stacking = True
# end if

process(data_source1, data_source2, output_path, interval_seconds, window_seconds, window_overlap,
Expand All @@ -514,7 +534,8 @@ def main(data_source1, data_source2, output_path, window_seconds, window_overlap
start_time, end_time, instrument_response_inventory, instrument_response_output, water_level,
clip_to_2std, whitening, whitening_window_frequency, one_bit_normalize, location_preferences,
ds1_zchan, ds1_nchan, ds1_echan, ds2_zchan, ds2_nchan, ds2_echan, corr_chan, envelope_normalize,
ensemble_stack, apply_stacking, restart, dry_run, no_tracking_tag, scratch_folder)
ensemble_stack, subset_stack, apply_simple_stacking, restart, dry_run, no_tracking_tag,
scratch_folder)
# end func

if __name__ == '__main__':
Expand Down
Loading

0 comments on commit 15c5e04

Please sign in to comment.