-
Notifications
You must be signed in to change notification settings - Fork 9
/
compute_rates_all_sources.R
executable file
·2214 lines (1867 loc) · 99.4 KB
/
compute_rates_all_sources.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Compute the rates for all source-zones, and assign nominal 'individual event' rates
# to each tsunami event in the synthetic catalogue.
#
# Note the 'individual event' rates are not by themselves particularly
# meaningful [they simply involve distributing the rate for each Mw value over
# all events with that Mw]. For example, the nominal 'individual event' rates
# are inversely related to the number of events in the magnitude category, which is
# somewhat arbitrary [especially for stochastic slip].
#
library(rptha)
config = new.env()
source('config.R', local=config)
gcmt_access = new.env()
source('gcmt_subsetter.R', local=gcmt_access)
if(config$edge_correct_event_rates){
edge_correct = new.env()
source('back_calculate_convergence.R', local=edge_correct)
}
#
# INPUTS
#
sourcezone_parameter_file = config$sourcezone_parameter_file
sourcezone_parameters = read.csv(sourcezone_parameter_file, stringsAsFactors=FALSE)
# Quick check on the input file
source('check_sourcezone_parameters.R', local=TRUE)
check_sourcezone_parameter_row_weights(sourcezone_parameters)
# Get the source-name. Segmented cases have an extra 'segment name' that
# distinguishes them
source_segment_names = paste0(sourcezone_parameters$sourcename,
sourcezone_parameters$segment_name)
# Never allow Mw_max to be greater than this
MAXIMUM_ALLOWED_MW_MAX = config$MAXIMUM_ALLOWED_MW_MAX # 9.8
MAXIMUM_ALLOWED_MW_MAX_NORMAL = config$MAXIMUM_ALLOWED_MW_MAX_NORMAL
# Never allow Mw_max to be less than this
MINIMUM_ALLOWED_MW_MAX = config$MINIMUM_ALLOWED_MW_MAX # 7.65
# Increment between Mw values in the earthquake_events table. We will check
# that the table holds the same value
dMw = config$dMw # 0.1
# Only assign non-zero probability to earthquakes with Mw greater than this
MW_MIN = config$MW_MIN #
# We ensure that (Mw_max >= maximum_observed_mw + mw_observed_perturbation)
# This ensures that no logic-tree curve assigns zero probability to the largest
# observed event
mw_observed_perturbation = config$mw_observed_perturbation #0.05
# Truncated or 'characteristic' Gutenberg Richter model
Mw_frequency_dists = config$Mw_frequency_distribution_types #c('truncated_gutenberg_richter', 'characteristic_gutenberg_richter')
Mw_frequency_dists_p = config$Mw_frequency_distribution_weights #c(0.7, 0.3)
# Interpolate logic-tree parameter variation over this many values, all assumed to have
# equal rate. For example, if we provide source_coupling = c(0.1, 0.2, 0.7), then the
# actual coupling values will be "approx(source_coupling, n=nbins)$y"
nbins = config$logic_tree_parameter_subsampling_factor # 9
#
# END INPUTS
#
# Currently the code only treats pure normal or pure thrust events
stopifnot(all(sourcezone_parameters$rake[!is.na(sourcezone_parameters$rake)] %in% c(-90, 90)))
# Get environment we can use to provide spatially variable convergence
# information
source('make_spatially_variable_source_zone_convergence_rates.R')
bird2003_env = event_conditional_probability_bird2003_factory(
return_environment=TRUE)
#' Function to evaluate the rates for a given source-zone. This function returns
#' an environment, so we have easy access to key variables.
#'
#' This is a 'wrapper' function which uses a low-memory-demand "mock" if the
#' sourcezone_parameters_row$row_weight == 0. This saves time/memory when source-zones
#' have been replaced with others using the 'row_weight=0' approach.
#'
#' @param sourcezone_parameters_row data.frame a single row from sourcezone_parameters table
#' @param unsegmented_edge_rate_multiplier a constant, by which we increase the
#' conditional probability of ruptures on the edge of the source-zone. Used to
#' make the spatial distribution of moment release on the source better
#' approximate the desired value. If NULL, it is computed, or ignored (see
#' also config$edge_correct_event_rates)
#' @return the function environment
#'
source_rate_environment_fun<-function(sourcezone_parameters_row, unsegmented_edge_rate_multiplier=NULL){
sourcezone_parameters_row = sourcezone_parameters_row
unsegmented_edge_rate_multiplier = unsegmented_edge_rate_multiplier
if(as.numeric(sourcezone_parameters_row$row_weight) == 0){
# Shortcut for the case where the rate will always be zero!
out_env = source_rate_environment_fun_row_weight_zero(sourcezone_parameters_row, unsegmented_edge_rate_multiplier)
}else{
# Regular case
out_env = source_rate_environment_fun_standard(sourcezone_parameters_row, unsegmented_edge_rate_multiplier)
}
return(out_env)
}
#' Function to evaluate the rates for a given source-zone. This function returns
#' it's environment, so we have easy access to key variables.
#'
#' This is the main workhorse function, used when (row_weight != 0)
#'
#' @param sourcezone_parameters_row data.frame a single row from sourcezone_parameters table
#' @param unsegmented_edge_rate_multiplier constant by which we increase the
#' conditional probability of ruptures on the edge of the source-zone. Used to
#' make the spatial distribution of moment release on the source better
#' approximate the desired value. If NULL, it is computed, or ignored (see
#' also config$edge_correct_event_rates)
#' @return the function environment
#'
source_rate_environment_fun_standard<-function(sourcezone_parameters_row, unsegmented_edge_rate_multiplier=NULL){
#
# PART 1
# Read parameters, basic datasets, etc
#
# Store key parameters for easy write-out later
sourcepar = list()
sourcepar$sourcezone_parameters_row = sourcezone_parameters_row
source_name = sourcezone_parameters_row$sourcename
sourcepar$name = source_name
# We might be on a specific segment
segment_name = sourcezone_parameters_row$segment_name
source_segment_name = paste0(source_name, segment_name)
# If the input segment_name is blank, then we are on a 'full-zource-zone'. For some
# operations we need to be careful if this is not the case
is_a_segment = (sourcezone_parameters_row$segment_name != '')
# Check this is right
if(is_a_segment){
if(sum(sourcezone_parameters$sourcename == sourcezone_parameters_row$sourcename)<=1){
msg = paste0('Segment name provided on source with only one entry on ',
'sourcezone_parameters. \n In this case the segment name should be blank,',
' or there should be another segment in the table. Check ', source_segment_name)
stop(msg)
}
}else{
if(sum(sourcezone_parameters$sourcename == sourcezone_parameters_row$sourcename &
(sourcezone_parameters$segment_name == '')) != 1){
msg = paste0('Source appears more than once with blank segment names. Check',
source_segment_name)
stop(msg)
}
}
# Find the lower/upper alongstrike numbers for this segment. If missing, we
# assume the 'segment' is actually the entire source-zone
alongstrike_lower = as.numeric(
sourcezone_parameters_row$segment_boundary_alongstrike_index_lower)
if(is.na(alongstrike_lower)){
alongstrike_lower = 1
stopifnot(!is_a_segment)
}
alongstrike_upper = as.numeric(
sourcezone_parameters_row$segment_boundary_alongstrike_index_upper)
if(is.na(alongstrike_upper)){
alongstrike_upper = Inf
stopifnot(!is_a_segment)
}
stopifnot(alongstrike_lower < alongstrike_upper)
# Get the CMT data in this segment
target_rake = bird2003_env$unit_source_tables[[source_name]]$rake[1]
stopifnot(target_rake %in% c(-90, 90))
stopifnot(all(bird2003_env$unit_source_tables[[source_name]]$rake == target_rake))
# Get GCMT data in this source-zone, if it is 'near-enough' to pure thrust or normal
# Many default parameters controlling events we select are defined in config.R.
gcmt_data = gcmt_access$get_gcmt_events_in_poly(source_name,
alongstrike_index_min=alongstrike_lower,
alongstrike_index_max=alongstrike_upper,
target_rake_value=target_rake,
filter_by_strike = (target_rake != -90), # For normal fault sources, allow the strike to go either way (like outer rise events)
unit_source_table = bird2003_env$unit_source_tables[[source_name]])
# Get a vector which is true/false depending on whether each unit-source is
# inside this particular segment. (Not to be confused with 'is_a_segment', which is a scaler logical,
# telling us if the current source is just a segment of a source-zone)
is_in_segment =
((bird2003_env$unit_source_tables[[source_name]]$alongstrike_number >= alongstrike_lower) &
(bird2003_env$unit_source_tables[[source_name]]$alongstrike_number <= alongstrike_upper))
which_is_in_segment = which(is_in_segment)
# Source area
unit_source_areas = bird2003_env$unit_source_tables[[source_name]]$length *
bird2003_env$unit_source_tables[[source_name]]$width
sourcepar$area = sum(unit_source_areas)
sourcepar$area_in_segment = sum(unit_source_areas*is_in_segment)
#
# Gutenberg Richter b-value
#
sourcepar$b = sourcezone_parameters_row[1, c('bmin', 'bpref', 'bmax')]
sourcepar$b = approx(as.numeric(sourcepar$b), n=nbins*config$b_subsampling_increase)$y
sourcepar$b_p = rep(1, length(sourcepar$b))/length(sourcepar$b)
#
# Coupling
#
if(config$coupling_prior_type == 'uniform'){
sourcepar$coupling = config$uniform_coupling_prior
if(any(sourcepar$coupling == 0)) stop('Cannot treat coupling of zero, instead set prob_Mmax_below_Mmin > 0')
# Log spacing to equally resolve all coupling values in a relative sense
# However, we assign probabilities consistent with a uniform distribution
# (the log spacing is to improve numerical aspects of the discretization,
# we are not claiming that log(coupling) is uniformly distributed!)
sourcepar$coupling = exp(approx(log(as.numeric(sourcepar$coupling)),
n=nbins*config$coupling_subsampling_increase)$y)
# Discretize the probabilities as a uniform distribution (NOT log-uniform!)
# This choice will ensure the numerical derivatives are constant (i.e. uniform density)
# On the other hand, the discretization will lead to a small numerical difference between
# the mean coupling, and the mean of a uniform distribution -- although this 'converges away' in
# the limit of nbins-->Inf
bin_size = sourcepar$coupling
sourcepar$coupling_p = bin_size/sum(bin_size)
}else if(config$coupling_prior_type == 'spreadsheet'){
# Just use the spreadsheet values
sourcepar$coupling = sourcezone_parameters_row[1, c('cmin', 'cpref', 'cmax')]
sourcepar$coupling_p = rep(1, length(sourcepar$coupling))
sourcepar$coupling_p = sourcepar$coupling_p / sum(sourcepar$coupling_p)
}else if(config$coupling_prior_type == 'spreadsheet_and_uniform_50_50'){
# 50% on spreadsheet values, 50% on uniform prior
pr1 = config$uniform_coupling_prior
pr2 = sourcezone_parameters_row[1, c('cmin', 'cpref', 'cmax')]
# Empirical CDF for the uniform coupling prior
uniform_ecdf = approxfun(pr1, seq(0, 1, len=length(pr1)), rule=2)
spreadsheet_ecdf = approxfun(pr2, seq(0, 1, len=length(pr2)), rule=2)
final_ecdf<-function(x) 0.5*(uniform_ecdf(x) + spreadsheet_ecdf(x))
# Closer together when the values are small
range_c = range(c(pr1, pr2))
coupling_vals = exp(approx(log(range_c), n=nbins*config$coupling_subsampling_increase)$y)
ll = length(coupling_vals)
# Use numerical derivative to get the density
coupling_forward = 0.5*(coupling_vals + c(coupling_vals[-1], 2*coupling_vals[ll] - coupling_vals[ll-1]))
coupling_backward = 0.5*(coupling_vals + c(2*coupling_vals[1] - coupling_vals[2], coupling_vals[1:(ll-1)]))
coupling_p = final_ecdf(coupling_forward) - final_ecdf(coupling_backward)
coupling_p = coupling_p / sum(coupling_p)
sourcepar$coupling = coupling_vals
sourcepar$coupling_p = coupling_p
}else{
stop('unrecognized coupling prior type')
}
# We may have put some weight on 'aseismic' behaviour in the magnitude range of interest
sourcepar$prob_Mmax_below_Mmin = as.numeric(sourcezone_parameters_row$prob_Mmax_below_Mmin)
if(sourcepar$prob_Mmax_below_Mmin > 0){
# We have a certain probability of '0' coupling.
# Append that as an additional bin
p0 = sourcepar$prob_Mmax_below_Mmin
stopifnot(p0 > 0 & p0 <= 1)
sourcepar$coupling = c(0, sourcepar$coupling)
sourcepar$coupling_p = c(p0, sourcepar$coupling_p * (1-p0))
}
#
# Mw_max
#
min_mw_max = max(
sourcezone_parameters_row$mw_max_observed + mw_observed_perturbation,
MINIMUM_ALLOWED_MW_MAX)
# Upper bound mw-max --> scaling relation, with area at -1 standard deviation
max_mw_max_strasser_area = Mw_2_rupture_size_inverse(sourcepar$area_in_segment,
relation = sourcezone_parameters_row$scaling_relation, CI_sd=-1)
# Alternative upper bound mw-max --> scaling relation, with width at -2 standard deviation
# This is a good idea from a practical perspective, because I generate variable-uniform
# and stochastic slip events by simulating width/length within +-2 SD limits. If width
# cannot accomodate this, then it is truncated (and in 50% of cases, length
# is expanded commensurately, while in the other 50% of cases, length is
# unchanged). If I allow Mw_max such that the source-zone width is < 2 standard deviations
# below the scaling value, then I will end up having very small area earthquakes in the
# cases where width is truncated, but length is not expanded. On some
# source-zones, this can lead to 100s of metres of mean slip even for e.g.
# Mw 9.1 earthquakes (say on Manus which is only 1-unit-source down-dip, but long enough
# to have an area-based Mw_max of 9.15).
sourcezone_widths = aggregate(
bird2003_env$unit_source_tables[[source_name]]$width[which_is_in_segment],
by=list(bird2003_env$unit_source_tables[[source_name]]$alongstrike_number[which_is_in_segment]),
sum)
mean_sourcezone_width = mean(sourcezone_widths$x)
max_mw_max_strasser_width = uniroot(
f<-function(x){
output = Mw_2_rupture_size(x, relation=sourcezone_parameters_row$scaling_relation,
detailed=TRUE, CI_sd=2)$minus_CI['width'] - mean_sourcezone_width
return(output)
},
interval=c(2, 20), # Mw must be between these bounds!
tol=1e-10
)$root
max_mw_max_strasser = min(max_mw_max_strasser_area, max_mw_max_strasser_width)
if(max_mw_max_strasser > min_mw_max){
sourcepar$Mw_max = c(
# Largest observed plus a small value,
min_mw_max,
# Upper mw [Strasser - 1SD area, OR Strasser -2SD width]
max_mw_max_strasser)
}else{
stop(paste0('Scaling relation mw_max < max observed @', source_segment_name))
}
# Ensure ordered
sourcepar$Mw_max = sort(sourcepar$Mw_max)
# Check it is correctly ordered (of course!)
stopifnot(all(diff(sourcepar$Mw_max) > 0))
# Ensure all Mw meet out constraints
sourcepar$Mw_max = pmax(sourcepar$Mw_max, min_mw_max)
# Interpolate
sourcepar$Mw_max = approx(sourcepar$Mw_max, n=nbins*config$mwmax_subsampling_increase)$y
# Assign equal probabilities to all
sourcepar$Mw_max_p = rep(1, length(sourcepar$Mw_max) )/length(sourcepar$Mw_max)
# Clip AFTER interpolation
if(target_rake == -90){
sourcepar$Mw_max = pmin(sourcepar$Mw_max, MAXIMUM_ALLOWED_MW_MAX_NORMAL)
}else{
sourcepar$Mw_max = pmin(sourcepar$Mw_max, MAXIMUM_ALLOWED_MW_MAX)
}
#
# PART 2.
# Tectonic convergence rate
#
if(sourcezone_parameters_row$use_bird_convergence == 1){
#
# Idea: If plate convergence vector is between
# "+-config$rake_deviation" of pure thrust (or normal),
# then use the raw vector. Otherwise, project it onto the nearest
# within that range
#
# This is consistent with our use of data, which extracts earthquakes
# with rake being within some deviation of pure thrust (or normal)
#
#
# FIXME: The logic below only works for pure thrust (rake = 90), or pure
# normal (rake = -90). This is OK for PTHA18 but might need to be generalised in future
stopifnot(all(bird2003_env$unit_source_tables[[source_name]]$rake %in% c(-90, 90)))
if(bird2003_env$unit_source_tables[[source_name]]$rake[1] == -90){
# Normal -- in this case, positive 'vel_div' values contribute to the seismic moment
div_vec = pmax(0, bird2003_env$unit_source_tables[[source_name]]$bird_vel_div) * is_in_segment
}else{
# Thrust -- in this case, negative 'vel_div' values contribute to the seismic moment
div_vec = pmax(0, -bird2003_env$unit_source_tables[[source_name]]$bird_vel_div) * is_in_segment
}
if(max(div_vec) <= 0) stop(paste0('No tectonic moment on source ', source_name, '-- suggests an input bug'))
# Shorthand divergent (or convergent) and right-lateral velocity
# NOTE: We zero velocities that are not on this segment. Also using abs(divergent_velocity),
# we compute positive tectonic moment for both normal and thrust events.
bvrl = bird2003_env$unit_source_tables[[source_name]]$bird_vel_rl * is_in_segment
# Limit lateral component of motion that we consider, based on the permitted rake
# deviation from pure thrust
deg2rad = pi/180
allowed_rake_deviation_radians = config$rake_deviation * deg2rad
rl_vec = sign(bvrl) * pmin(abs(bvrl), div_vec*tan(allowed_rake_deviation_radians))
# NOTE: If we have segmentation, then this source-zone averaged slip
# value will be a localised value.
sourcepar$slip = weighted.mean(
# Convergent slip
x = sqrt(div_vec**2 + rl_vec**2),
# Weighted by area of unit-sources in the segment
w = unit_source_areas * is_in_segment)
}else{
# Using a constant convergence rate everywhere
sourcepar$slip = as.numeric(sourcezone_parameters_row$tectonic_slip)*
as.numeric(sourcezone_parameters_row$convergent_fraction)
div_vec = rep(sourcepar$slip, length(unit_source_areas)) * is_in_segment
}
# Account for non-zero dip, and convert from mm/year to m/year -- only
# averaging over unit-sources in the segment.
mean_dip = mean_angle(bird2003_env$unit_source_tables[[source_name]]$dip[which(is_in_segment == 1)])
sourcepar$mean_dip = mean_dip
deg2rad = pi/180
cos_dip = cos(mean_dip*deg2rad)
sourcepar$cos_dip = cos_dip
sourcepar$slip = sourcepar$slip/cos_dip * 1/1000
#
# Get the event table. We need to pass this to the rate function (so it can
# moment balance)
#
event_table_file = paste0('../SOURCE_ZONES/', source_name,
'/TSUNAMI_EVENTS/all_uniform_slip_earthquake_events_',
source_name, '.nc')
event_table = read_table_from_netcdf(event_table_file)
# Round away any finite-precision issues in the netcdf file
# Our increments are by dMw
event_table$Mw = round(event_table$Mw, 3)
stopifnot( all(event_table$Mw == sort(event_table$Mw)) )
# Check we didn't destroy the table by rouding!
stopifnot( all( (diff(event_table$Mw) == 0) | ( abs(diff(event_table$Mw) - dMw) < 1.0e-12 ) ) )
#
# Treatment of variable shear modulus (mathematically, it's like a 'magnitude-observation-error')
#
include_variable_shear_modulus = (target_rake == 90)
if(include_variable_shear_modulus){
# Treatment of variable shear modulus
#
# Here we compute the conditional cumulative distribution function of
# the magnitude-difference (i.e. between the 'variable shear modulus'
# magnitude, and the 'fixed shear modulus' magnitude). It is
# conditional on the 'fixed shear modulus' magnitude. By treating these
# differences as 'errors in observed magnitudes', we get a convenient
# method of treating variable shear modulus in the analysis.
#
# WE USE HETEROGENEOUS SLIP MODELS TO COMPUTE THE magnitude-difference
# CDF IN ALL CASES!
# Why? The empirical CDF of magnitude-difference will actually differ
# depending on whether we compute the function using
# uniform/variable-uniform/heterogeneous slip scenarios.
# However, experimentation suggests the impact of this variation on our
# rate models is small. Heuristically, this is because the CDF affects
# the 'observed magnitude rates' via a convolution integral, so
# deviations are 'averaged out' in that process.
# It is much simpler in terms of our workflow to use a single model, so
# that is implemented here
# The empirical CDF based on uniform-slip-with-fixed-size has
# clearer 'discretization' artefacts due to reduced variability. So we
# use stochastic slip to derive the empirical CDF, as the large number
# of events + natural variability leads to a nicely behaved function.
#
stochastic_slip_event_table_file = paste0('../SOURCE_ZONES/', source_name,
'/TSUNAMI_EVENTS/all_stochastic_slip_earthquake_events_', source_name, '.nc')
stochastic_slip_fid = nc_open(stochastic_slip_event_table_file)
stoc_mw_mu_constant = ncvar_get(stochastic_slip_fid, 'Mw')
stoc_mw_mu_constant = round(stoc_mw_mu_constant, 3) # Address imperfect floating point storage in netcdf
stoc_mw_mu_variable = ncvar_get(stochastic_slip_fid, 'variable_mu_Mw')
# Difference between variable vs uniform shear modulus
mw_obs_deviation = stoc_mw_mu_variable - stoc_mw_mu_constant
# Sanity check (specific to our case)
stopifnot( (min(mw_obs_deviation) > -0.32) & (max(mw_obs_deviation) < 0.25) )
# If we have segmentation, then we should only look at events which touch the segment
to_keep = rep(TRUE, length(stoc_mw_mu_variable))
if(is_a_segment){
# Read the event indices, and identify events that do not touch the current segment
stoc_eis = ncvar_get(stochastic_slip_fid, 'event_index_string')
stoc_eis = sapply(stoc_eis, f<-function(x) as.numeric(strsplit(x, '-')[[1]]), simplify=FALSE)
for(ei in 1:length(stoc_mw_mu_variable)){
inds = stoc_eis[[ei]]
# Record events that are not in the current segment.
if(!any(is_in_segment[inds])) to_keep[ei] = FALSE
}
# Save memory
rm(stoc_eis)
}
nc_close(stochastic_slip_fid); rm(stochastic_slip_fid)
# Restrict CDF based on events in the segment
keepers = which(to_keep)
if(length(keepers) == 0) stop('Error: No events in segment. This suggests a bug')
mw_obs_deviation = mw_obs_deviation[keepers]
stoc_mw_mu_constant = stoc_mw_mu_constant[keepers]
# Conditional empirical CDF of difference between 'Mw observation' and
# 'Mw with constant shear modulus'
mw_deviation_cdf_variable_shear_modulus = make_conditional_ecdf(mw_obs_deviation,
stoc_mw_mu_constant)
# Reduce memory
rm(mw_obs_deviation, stoc_mw_mu_constant, stoc_mw_mu_variable, keepers, to_keep); gc()
}else{
# For normal fault events, ignore shear modulus variations
# Often we only have one
mw_deviation_cdf_variable_shear_modulus = NULL
}
#
# PART 3
# Get a preliminary event conditional probability model; use it to compute
# the mw_rate_function, then update the event conditional probability model
# to deal with edge-effects (this will not affect the mw_rate_function)
#
#
# Event conditional probabilities
#
if(sourcezone_parameters_row$use_bird_convergence == 1){
# Make a function which uses Bird's spatially variable convergence to
# assign conditional probabilities
conditional_probability_model =
bird2003_env$make_conditional_probability_function_uniform_slip(
source_name, is_in_segment)
}else{
## conditional_probability_model = 'inverse_slip'
# Make a conditional probability model like 'inverse_slip', but which
# considers whether the event is in the segment
conditional_probability_model<-function(event_table_with_fixed_Mw){
# Note for uniform-slip models with fixed rigidity, inverse-slip is
# proportional to area (with magnitude fixed).
# That is the case treated here (notwithstanding perturbations to
# deal with variable slip or rigidity).
slip_inv = 1/event_table_with_fixed_Mw$slip
# Find the fraction of the unit sources in each event which are
# inside our segment
fraction_in_segment = slip_inv * 0.0
for(i in 1:nrow(event_table_with_fixed_Mw)){
unit_source_indices = get_unit_source_indices_in_event(
event_table_with_fixed_Mw[i,])
fraction_in_segment[i] = mean(is_in_segment[unit_source_indices])
}
# Conditional probability will entirely be weighted on events that
# touch the segment. Events that are only partially contained will
# be downweighted accordingly
output = slip_inv * fraction_in_segment
output = output/sum(output)
return(output)
}
}
# NOTE: These conditional probabilities may be updated to deal with 'edge-effects'
# further in the code. The aim is to make the integrated slip more consistent with
# moment conservation -- whereas the current approach tends to concentrate moment
# release more towards the centre of the rupture, because by construction we have less
# events touching unit-sources at the source-zone along-strike extremes
# than we have events touching the middle of the source-zone.
event_conditional_probabilities = get_event_probabilities_conditional_on_Mw(
event_table,
conditional_probability_model = conditional_probability_model)
#
# Get earthquake magnitude data for rate function update
#
if(nrow(gcmt_data) > 0){
gcmt_data_for_rate_function = list(
# Magnitude
Mw = gcmt_data$Mw,
# Time since the start of observation, in years
#t = (gcmt_data$julianDay1900 -
# gcmt_access$cmt_start_time_julianDay1900)/gcmt_access$days_in_year
t = NULL # Censored likelihood is biased for rates, better to use poisson count approach.
)
if(min(sourcepar$coupling) == 0){
stop('Cannot have zero coupling logic tree branch when GCMT data is present')
}
}else{
gcmt_data_for_rate_function = list(Mw = NULL, t = NULL)
}
MW_MIN = MW_MIN
#
# Build rate function
#
mw_rate_function = rate_of_earthquakes_greater_than_Mw_function(
slip_rate = as.numeric(sourcepar$slip * sourcepar$coupling),
slip_rate_prob = as.numeric(sourcepar$coupling_p),
b = as.numeric(sourcepar$b),
b_prob = sourcepar$b_p,
Mw_min = MW_MIN,
Mw_min_prob = 1,
Mw_max = as.numeric(sourcepar$Mw_max),
Mw_max_prob = sourcepar$Mw_max_p,
# Need to pass the local segment area here
sourcezone_total_area = sourcepar$area_in_segment,
# Note we pass the 'full' event table, but in segments some of these
# events will have conditional probability of zero
event_table = event_table,
event_conditional_probabilities = event_conditional_probabilities,
computational_increment = 0.02,
Mw_frequency_distribution = Mw_frequency_dists,
Mw_frequency_distribution_prob = Mw_frequency_dists_p,
update_logic_tree_weights_with_data=config$update_logic_tree_weights_with_data,
Mw_count_duration = c(gcmt_access$mw_threshold, nrow(gcmt_data),
gcmt_access$cmt_duration_years),
Mw_obs_data = gcmt_data_for_rate_function,
account_for_moment_below_mwmin = TRUE,
mw_observation_error_cdf = mw_deviation_cdf_variable_shear_modulus
)
# Nearly finished .....
#
# Optionally adjust the conditional probability of the events, to better satisfy spatial
# variations in seismic moment conservation
#
if(config$edge_correct_event_rates){
#
# Derive new event conditional probabilities which better represent
# spatial variations in tectonic slip. We do this by inflating the
# conditional probabilities of 'edge_events' (i.e. events which
# occur on the boundary of the unsegmented source-zone) by a factor 'edge_multiplier'.
#
# Here, we find an 'edge_multiplier' that well represents spatial
# variations in convergence.
#
fun_to_optimize<-function(edge_multiplier, return_conditional_probabilities=FALSE){
# Rates based on the 'edge_multiplier = 0' case
event_rates = event_conditional_probabilities *
(mw_rate_function(event_table$Mw - dMw/2) -
mw_rate_function(event_table$Mw + dMw/2) )
# Compute spatially variable integrated slip, given the edge_multiplier argument
back_calculate_convergence_env = new.env()
back_calculate_convergence_env = edge_correct$back_calculate_convergence(
sourcename=source_name,
slip_type='uniform',
edge_multiplier=edge_multiplier,
uss = bird2003_env$unit_source_tables[[source_name]],
event_rates = event_rates,
event_Mw = event_table$Mw,
event_index_string = event_table$event_index_string,
slip = event_table$slip)
model = back_calculate_convergence_env$output$integrated_slip
#
# We want the shape of the convergent slip to be like 'div_vec'.
# Coupling might rescale this. Return a goodness of fit value that
# reflects that, unless we explicitly request to return the above
# function environment
#
if(!return_conditional_probabilities){
output = sum( is_in_segment*( model/sum(model) - div_vec/sum(div_vec) )**2 )
}else{
# This is useful once we know the best edge_multiplier
output = back_calculate_convergence_env
}
return(output)
}
if(!is_a_segment){
# Tests to weed out problematic cases
f1 = fun_to_optimize(0.0)
f2 = fun_to_optimize(10.0)
if(isTRUE(all.equal(f1, f2))){
# This should only happen if events on the boundary all have
# a-prior weight of zero, which should only happen for 'is_a_segment'
# cases
msg = paste0('Use of an edge multiplier is having no impact on source ',
source_segment_name)
stop(msg)
}else{
# Find the optimal edge_multiplier
best_edge_mult = optimize(fun_to_optimize, lower=0, upper=30)
# Check that results are not hitting these boundaries
if(best_edge_mult$minimum < 1.0e-03 | best_edge_mult$minimum > 29.999){
print(paste0('Check edge_multiplier on ', source_segment_name))
}
# Get the conditional probabilities from the 'best' edge_multiplier
best_conv_env = fun_to_optimize(best_edge_mult$minimum, return_conditional_probabilities=TRUE)
event_conditional_probabilities = best_conv_env$new_conditional_probability
rm(best_conv_env)
gc()
}
}else{
#
# We are on a segment. It might be hard to stably estimate the edge_multiplier, because
# the edge events might have little impact on the convergence on this source. Therefore,
# it seems better to use the edge_multiplier from the unsegmented source
if( is.null(unsegmented_edge_rate_multiplier) ){
msg = 'Must provide an unsegmented_edge_rate_multiplier on segments'
stop(msg)
}
best_edge_mult = list()
best_edge_mult$minimum = unsegmented_edge_rate_multiplier
if(best_edge_mult$minimum < 1.0e-03 | best_edge_mult$minimum > 29.999){
print(paste0('Check edge_multiplier on ', source_segment_name))
}
# Get the conditional probabilities from the 'best' edge_multiplier
best_conv_env = fun_to_optimize(best_edge_mult$minimum, return_conditional_probabilities=TRUE)
event_conditional_probabilities = best_conv_env$new_conditional_probability
rm(best_conv_env)
gc()
}
sourcepar$best_edge_multiplier = best_edge_mult
}
#
# Nominal rates on all events
#
event_rates = event_conditional_probabilities *
(mw_rate_function(event_table$Mw - dMw/2) -
mw_rate_function(event_table$Mw + dMw/2) )
event_rates_mu_vary = event_conditional_probabilities *
(mw_rate_function(event_table$Mw - dMw/2, account_for_mw_obs_error=TRUE) -
mw_rate_function(event_table$Mw + dMw/2, account_for_mw_obs_error=TRUE) )
#
# Get the fraction of the logic-tree which places non-zero weight on the
# event being possible. This is nice for heuristically describing epistemic
# uncertainties.
#
# Note that since the event rates are evaluated as GR(Mw-dMw/2) - GR(Mw+dMw/2),
# it makes most sense to evaluate this term at Mw-dMw/2
#
# For segments, we need to be careful to not 'double-count' events which
# are in 2 segments. In that case the conditional probability model is
# partially weighted in each, and we should make sure the
# weight_with_nonzero_rate is as well
#
if(is_a_segment){
fraction_in_segment = sapply(event_table$event_index_string, f<-function(x){
inds = as.numeric(strsplit(x, '-')[[1]])
output = mean(is_in_segment[inds])
return(output)
})
}else{
fraction_in_segment = 1
}
event_weight_with_nonzero_rate = (event_conditional_probabilities > 0) *
fraction_in_segment *
mw_rate_function(event_table$Mw-dMw/2, epistemic_nonzero_weight=TRUE)
# As above with variable mu
event_weight_with_nonzero_rate_mu_vary = (event_conditional_probabilities > 0) *
fraction_in_segment *
mw_rate_function(event_table$Mw-dMw/2, epistemic_nonzero_weight=TRUE, account_for_mw_obs_error=TRUE)
#
# Quantiles below here
#
# Note that on source-zones with optional segmentation, we will have to change
# these quantiles later.
#
# The idea is that "for the segments", the uncertainties
# should be co-monotinic (i.e. if 16th percentile is true on segmentA, then
# 16th percentile is also true on segmentB, etc). So the 16th percentile exceedance
# rate for 'all segments combined' can be obtained by adding their 16th percentiles.
#
# However, we will want to compute quantiles resulting from the "weighted sum of
# the unsegmented treatment, and the segmented treatment". For example, suppose
# we give 50% weight to the segmented option (all segments, co-monotonic), and
# 50% weight to the unsegmented option. In this case, the 16th percentile for the
# combination might not be based on combining the 16th percentile for each option
# separately. Instead, we might take the 32-percentile from the segmented sources,
# and no weight from the unsegmented source (this would be correct if the 32-percentile
# in the segmented case was smaller than the smallest unsegmented percentile). Or we might
# take 20% from the segmented models, and 12% from unsegmented. The actual
# number will depend on the details of the source and segments.
#
#
# Upper credible interval bound. Wrap in as.numeric to avoid having a 1
# column matrix as output
event_rates_upper = as.numeric(event_conditional_probabilities *
(mw_rate_function(event_table$Mw - dMw/2, quantiles=config$upper_ci_inv_quantile) -
mw_rate_function(event_table$Mw + dMw/2, quantiles=config$upper_ci_inv_quantile) )
)
event_rates_upper_mu_vary = as.numeric(event_conditional_probabilities *
(mw_rate_function(event_table$Mw - dMw/2, quantiles=config$upper_ci_inv_quantile,
account_for_mw_obs_error=TRUE) -
mw_rate_function(event_table$Mw + dMw/2, quantiles=config$upper_ci_inv_quantile,
account_for_mw_obs_error=TRUE) )
)
# Lower credible interval bound. Wrap in as.numeric to avoid having a 1
# column matrix as output
event_rates_lower = as.numeric(event_conditional_probabilities *
(mw_rate_function(event_table$Mw - dMw/2, quantiles=config$lower_ci_inv_quantile) -
mw_rate_function(event_table$Mw + dMw/2, quantiles=config$lower_ci_inv_quantile) )
)
event_rates_lower_mu_vary = as.numeric(event_conditional_probabilities *
(mw_rate_function(event_table$Mw - dMw/2, quantiles=config$lower_ci_inv_quantile,
account_for_mw_obs_error=TRUE) -
mw_rate_function(event_table$Mw + dMw/2, quantiles=config$lower_ci_inv_quantile,
account_for_mw_obs_error=TRUE) )
)
#
# Median
#
event_rates_median = as.numeric(event_conditional_probabilities *
(mw_rate_function(event_table$Mw - dMw/2, quantiles=0.5) -
mw_rate_function(event_table$Mw + dMw/2, quantiles=0.5) )
)
event_rates_median_mu_vary = as.numeric(event_conditional_probabilities *
(mw_rate_function(event_table$Mw - dMw/2, quantiles=0.5, account_for_mw_obs_error=TRUE) -
mw_rate_function(event_table$Mw + dMw/2, quantiles=0.5, account_for_mw_obs_error=TRUE) )
)
#
# 16pc quantile
#
event_rates_16pc = as.numeric(event_conditional_probabilities *
(mw_rate_function(event_table$Mw - dMw/2, quantiles=0.16) -
mw_rate_function(event_table$Mw + dMw/2, quantiles=0.16) )
)
event_rates_16pc_mu_vary = as.numeric(event_conditional_probabilities *
(mw_rate_function(event_table$Mw - dMw/2, quantiles=0.16, account_for_mw_obs_error=TRUE) -
mw_rate_function(event_table$Mw + dMw/2, quantiles=0.16, account_for_mw_obs_error=TRUE) )
)
#
# 84 pc quantile
#
event_rates_84pc = as.numeric(event_conditional_probabilities *
(mw_rate_function(event_table$Mw - dMw/2, quantiles=0.84) -
mw_rate_function(event_table$Mw + dMw/2, quantiles=0.84) )
)
event_rates_84pc_mu_vary = as.numeric(event_conditional_probabilities *
(mw_rate_function(event_table$Mw - dMw/2, quantiles=0.84, account_for_mw_obs_error=TRUE) -
mw_rate_function(event_table$Mw + dMw/2, quantiles=0.84, account_for_mw_obs_error=TRUE) )
)
gc()
return(environment())
}
#' Compute source-zone variables efficiently when row_weight=0
#'
#' To allow dropping/replacing source-zones cleanly, we allow row_weight=0. This means
#' that all events on the source-zone will have a rate=0, i.e. it will not contribute
#' to the final hazard.
#'
#' In that instance the call to source_rate_environment_fun is overly expensive
#' and demanding of memory. However, to run the code without too many changes, we really
#' need some variables to exist even when row_weight=0.
#'
#' This function makes those variables, efficiently, in the 'row_weight=0' case
#'
source_rate_environment_fun_row_weight_zero<-function(
sourcezone_parameters_row,
unsegmented_edge_rate_multiplier=NULL){
#
# PART 1
# Read parameters, basic datasets, etc
#
# Store key parameters for easy write-out later
sourcepar = list()
sourcepar$sourcezone_parameters_row = sourcezone_parameters_row
source_name = sourcezone_parameters_row$sourcename
sourcepar$name = source_name
# We might be on a specific segment
segment_name = sourcezone_parameters_row$segment_name
source_segment_name = paste0(source_name, segment_name)
# If the input segment_name is blank, then we are on a 'full-zource-zone'. For some
# operations we need to be careful if this is not the case
is_a_segment = (sourcezone_parameters_row$segment_name != '')
# Rake
target_rake = bird2003_env$unit_source_tables[[source_name]]$rake[1]
# Edge multiplier
best_edge_mult = list()
best_edge_mult$minimum = 0
#
# Get the event table.
#
event_table_file = paste0('../SOURCE_ZONES/', source_name,
'/TSUNAMI_EVENTS/all_uniform_slip_earthquake_events_',
source_name, '.nc')
event_table = read_table_from_netcdf(event_table_file)
# Vector of zeros, reused as empty data elsewhere
empty_data = rep(0, length(event_table$Mw))
#
# 'ZERO' version of the variables that are needed later on,
# in order to 'cleanly' write zeros to the netcdf files.
#
event_rates = empty_data
event_weight_with_nonzero_rate = empty_data
event_rates_median = empty_data
event_rates_16pc = empty_data
event_rates_84pc = empty_data
event_rates_upper = empty_data
event_rates_lower = empty_data
event_rates_mu_vary = empty_data
event_weight_with_nonzero_rate_mu_vary = empty_data
event_rates_median_mu_vary = empty_data
event_rates_16pc_mu_vary = empty_data
event_rates_84pc_mu_vary = empty_data
event_rates_upper_mu_vary = empty_data
event_rates_lower_mu_vary = empty_data
# The above variables should allow the code to execute cleanly in all cases!
return(environment())
}
#' Suppose a source-zone has some weight on an unsegmented model, and the remaining
#' weight on a collection of segments.
#'
#' How should we compute percentiles (or credible-intervals) of the 'combined' Mw-frequency
#' curve (i.e. to characterise uncertainty)?
#'
#' The approach taken herein is:
#' A) Firstly take ALL the segmented sources. Their combined Mw-frequency curve
#' is developed, with percentiles based on assuming uncertainties are co-monotonic.
#'
#' For illustration, suppose we are interested in a percentile of the exceedance
#' rate of Mw 9 events (e.g. 16th percentile).
#' On the combined-segments, this is computed assuming the segments are 'co-monotonic'
#' (i.e. as the sum of the 16th percentile of the exceedance rates on each segment).
#'
#' B) Once (A) is completed, we have 2 models for the source-zone, one segmented and one unsegmented.
#' These will have been assigned row_weights which sum to 1.0 (e.g. 0.5 and 0.5 is common).
#' How to compute the 16th percentile (or any other) of the Mw 9.0 exceedance rate for this combination?
#' The solution is purely mathmatical -- there is a 50% chance of segmented or unsegmented being true, and
#' so the 16th percentile should be taken from the 16th percentile of the combined distribution.
#' In practice, this might mean we evaluate the segmented models at their 32th percentile and ignore
#' the unsegmented model (this would be correct if the 32th segmented percentile was lower than the 0th
#' unsegmented percentile). Or maybe 22th segmented and 10th segmented. It depends on the inputs, but the
#' answer can be calculated.
#'
#' This function does the above calculations, and creates new variables in the environment which
#' can map the 'rate percentiles' onto individual scenarios, which go into the netcdf files.
#'
#' @param all_sources a list of environments corresponding to all segmented and unsegmented sources on
#' a single source-zone. Each is an output of compute_rates_all_sources. The first one should be the
#' unsegmented source, followed by segments.
#' @param percentile_discretization small real number. The function works by numerically discretizing the mw-rate-function
#' at a range of logic-tree percentiles (i.e. inverse quantiles), with spacing = dp which is close to percentile_discretization,
#' while ensuring that 1/dp is an integer. The discretization sequence goes from dp/2 to 1-dp/2.
#' @param quick_exit_if_row_weights_all_zero Logical - Skip the expensive calculations if the row weights are all zero
update_scenario_rate_percentiles_on_source_zones_with_partial_segmentation<-function(all_sources,
percentile_discretization = 0.0025, quick_exit_if_row_weights_all_zero=TRUE){