Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Colored noise #541

Merged
merged 7 commits into from
May 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,4 @@ dependencies:
- pytest
- pip:
- pyhht
- stochastic
- '-e .'
4 changes: 2 additions & 2 deletions pyleoclim/core/coherence.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,9 +595,9 @@ def signif_test(self, number=200, method='ar1sim', seed=None, qs=[0.95], setting

Number of surrogate series to create for significance testing. The default is 200.

method : {'ar1sim','phaseran'}, optional
method : {'ar1sim','phaseran','CN'}, optional

Method through which to generate the surrogate series. The default is 'ar1sim'.
Method through which to generate the surrogate series. The default is 'phaseran'.

seed : int, optional

Expand Down
14 changes: 8 additions & 6 deletions pyleoclim/core/psds.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,15 +247,16 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95],
pyleoclim.core.series.Series.wavelet : Performs wavelet analysis on Pyleoclim Series

'''
from ..core.surrogateseries import SurrogateSeries
from ..core.surrogateseries import SurrogateSeries, supported_surrogates

if self.spec_method in ['wwz','lomb_scargle'] and method == 'ar1asym':
raise ValueError('Asymptotic solution is not supported for the wwz method')

supported_methods = ['CN', 'ar1sim', 'phaseran', 'ar1asym', 'uar1']
if method not in supported_methods:
raise ValueError(f"The available methods are {supported_methods}")

if method not in ['ar1sim', 'uar1','ar1asym']:
raise ValueError("The available methods are 'ar1sim', 'uar1' and 'ar1asym'")

if method in ['ar1sim', 'uar1']:
if method in supported_surrogates:
signif_scals = None
if scalogram:
try:
Expand Down Expand Up @@ -733,7 +734,8 @@ def plot(self, in_loglog=True, in_period=True, label=None, xlabel=None, ylabel='
signif_method_label = {
'ar1sim': 'AR(1) simulations (MoM)',
'uar1': 'AR(1) simulations (MLE)',
'ar1asym': 'AR(1) asymptotic solution'
'ar1asym': 'AR(1) asymptotic solution',
'CN': 'Colored Noise'
}
nqs = np.size(self.signif_qs.psd_list)

Expand Down
8 changes: 5 additions & 3 deletions pyleoclim/core/scalograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95],
Parameters
----------

method : {'ar1asym', 'ar1sim','uar1'}
method : {'ar1asym', 'ar1sim','uar1', 'CN'}

Method to use to generate the surrogates. ar1sim uses simulated timeseries with similar persistence.
ar1asym represents the theoretical, closed-form solution. The default is ar1sim
Expand Down Expand Up @@ -463,6 +463,8 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95],
pyleoclim.core.scalograms.MultipleScalogram : MultipleScalogram object

pyleoclim.utils.wavelet.tc_wave_signif : Asymptotic significance calculation

pyleoclim.core.surrogateseries.SurrogateSeries : surrogate series objects

Examples
--------
Expand All @@ -486,11 +488,11 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95],
if self.wave_method == 'wwz' and method == 'ar1asym':
raise ValueError('Asymptotic solution is not supported for the wwz method')

acceptable_methods = ['ar1sim', 'ar1asym', 'uar1']
acceptable_methods = ['ar1sim', 'ar1asym', 'uar1','CN']
if method not in acceptable_methods:
raise ValueError(f"The available methods are: {acceptable_methods}")

if method in ['ar1sim','uar1'] :
if method in ['ar1sim','uar1','CN']:
from ..core.surrogateseries import SurrogateSeries

if hasattr(self,'signif_scals'):
Expand Down
11 changes: 6 additions & 5 deletions pyleoclim/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3387,9 +3387,10 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method =

The significance of the correlation is assessed using one of the following methods:

1) 'ttest': T-test adjusted for effective sample size.
2) 'ar1sim': AR(1) modeling of x and y.
3) 'phaseran': phase randomization of original inputs. (default)
1) 'ttest': T-test adjusted for effective sample size, see [1]
2) 'ar1sim': AR(1) modeling of x and y (Monte Carlo method)
3) 'CN': colored noise (power-law spectrum) modeling of x and y (Monte Carlo method)
3) 'phaseran': phase randomization of original inputs. (Monte Carlo method, default), see [2]
4) 'built-in': uses built-in method

Note: Up to version v0.14.0. ar1sim was called "isopersistent", phaseran was called "isospectral"
Expand Down Expand Up @@ -3467,9 +3468,9 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method =

References
----------
[1] Hu, J., J. Emile-Geay, and J. Partin (2017), Correlation-based interpretations of paleoclimate data – where statistics meet past climates, Earth and Planetary Science Letters, 459, 362–371, doi:10.1016/j.epsl.2016.11.048.
_ [1] Hu, J., J. Emile-Geay, and J. Partin (2017), Correlation-based interpretations of paleoclimate data – where statistics meet past climates, Earth and Planetary Science Letters, 459, 362–371, doi:10.1016/j.epsl.2016.11.048.

[2] Ebisuzaki, W. (1997), A method to estimate the statistical significance of a correla- tion when the data are serially correlated, Journal of Climate, 10(9), 2147–2153, doi:10.1175/1520- 0442(1997)010¡2147:AMTETS¿2.0.CO;2.
_ [2] Ebisuzaki, W. (1997), A method to estimate the statistical significance of a correla- tion when the data are serially correlated, Journal of Climate, 10(9), 2147–2153, doi:10.1175/1520- 0442(1997)010¡2147:AMTETS¿2.0.CO;2.

Examples
--------
Expand Down
57 changes: 42 additions & 15 deletions pyleoclim/core/surrogateseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import numpy as np
import warnings

supported_surrogates = frozenset(['ar1sim','phaseran', 'uar1']) # broadcast all supported surrogates as global variable, for exception handling
supported_surrogates = frozenset(['ar1sim','phaseran','uar1','CN']) # broadcast all supported surrogates as global variable, for exception handling

class SurrogateSeries(EnsembleSeries):
''' Object containing surrogate timeseries, obtained either by emulating an
Expand All @@ -22,7 +22,7 @@ class SurrogateSeries(EnsembleSeries):
'''
def __init__(self, series_list=[], number=1, method=None, label=None, param=None, seed=None):
'''
Initiatlize a SurrogateSeries object. The parameters below are then used for series generation and labeling.
Initialize a SurrogateSeries object. The parameters below are then used for series generation and labeling.

Parameters
----------
Expand All @@ -32,16 +32,26 @@ def __init__(self, series_list=[], number=1, method=None, label=None, param=None
number : int
The number of surrogates to generate. The default is 1.

method : str {ar1sim, phaseran, uar1}
method : str {ar1sim, phaseran, uar1, CN}
The name of the method used to generate surrogates of the timeseries (no default)
- 'ar1sim' fits an AR(1) model using the method of moments (MoM) as per [1]
- 'phaseran' applies phase randomization as per [2, 3]
- 'CN' fits a "colored noise" model (power-law spectrum) as per [4] (Eq 15)

param : list
a list of parameters for the method in question. Ignored if the method does not require parameters.

label : str
label of the collection of timeseries (e.g. 'SOI surrogates [AR(1) MLE]')
label of the collection of timeseries (e.g. 'SOI surrogates [AR(1) MoM]')
If not provided, defaults to "series surrogates [{method}]"


References
----------
_ [1] Mudelsee, M. (2002), TAUEST: a computer program for estimating persistence in unevenly spaced weather/climate time series, Computers and Geosciences, 28, 69–72, doi:10.1016/S0098- 3004(01)00041-3.
_ [2] Ebisuzaki, W. (1997), A method to estimate the statistical significance of a correlation when the data are serially correlated, Journal of Climate, 10(9), 2147–2153, doi:10.1175/1520- 0442(1997)010¡2147:AMTETS¿2.0.CO;2.
_ [3] Prichard, D., and J. Theiler (1994), Generating surrogate data for time series with several simultaneously measured variables, Phys. Rev. Lett., 73, 951–954, doi: 10.1103/PhysRevLett.73.951.
_ [4] Kirchner, J. W., Aliasing in 1/f(alpha) noise spectra: origins, consequences, and remedies. Phys Rev E Stat Nonlin Soft Matter Phys 71, 066110 (2005).

Raises
------
ValueError
Expand All @@ -60,17 +70,19 @@ def __init__(self, series_list=[], number=1, method=None, label=None, param=None
raise ValueError("2 parameters are needed for this model")
elif method == 'phaseran':
warnings.warn("Phase-randomization is a non parametric method; the provided parameters will be ignored")
elif method in ['fBm','CN'] and nparam >1:
elif method == 'CN' and nparam >1:
raise ValueError(f"1 parameter is needed for this model; only the first of the provided {nparam} will be used")

# refine the display name
if label is None:
if method == 'ar1sim':
self.label = "AR(1) surrogates (MoM)"
self.label = "AR(1) (MoM)"
elif method == 'phaseran':
self.label = "phase-randomized surrogates"
self.label = "phase-randomized"
elif method == 'uar1':
self.label = "AR(1) surrogates (MLE)"
self.label = "AR(1) (MLE)"
elif method == 'CN':
self.label = r'$f^{-\beta}$'
else:
raise ValueError(f"Unknown method: {self.method}. Please use one of {supported_surrogates}")

Expand Down Expand Up @@ -135,6 +147,14 @@ def from_series(self, target_series):
times = np.squeeze(np.tile(target_series.time, (self.number, 1)).T)
# generate matrix
y_surr = tsmodel.uar1_sim(t = times, tau=tau, sigma_2=sigma_2)

elif self.method == 'CN':
tsi = target_series if target_series.is_evenly_spaced() else target_series.interp()
alpha = tsi.spectral(method='cwt').beta_est().beta_est_res['beta'] # fit the parameter using the smoothest spectral method
self.param = alpha
y_surr = np.empty((len(target_series.time),self.number))
for i in range(self.number):
y_surr[:,i] = tsmodel.colored_noise(alpha=alpha, t=target_series.time)

if self.number > 1:
s_list = []
Expand All @@ -158,7 +178,7 @@ def from_param(self, param, length=50, time_pattern = 'even', settings=None):
Parameters
----------
param : list
model parameters (e.g. [tau, sigma0] for an AR(1) model)
model parameters (e.g. [tau, sigma0] for an AR(1) model, [beta] for colored noise)

length : int
Length of the series. Default: 50
Expand All @@ -184,7 +204,7 @@ def from_param(self, param, length=50, time_pattern = 'even', settings=None):
--------

pyleoclim.utils.tsmodel.ar1_sim : AR(1) simulator
pyleoclim.utils.tsmodel.uar1_sim : maximum likelihood AR(1) simulator
pyleoclim.utils.tsmodel.colored_noise: simulating from a power law spectrum, $S(f) \propto f^{-\beta}$
pyleoclim.utils.tsmodel.random_time_axis : Generate time increment vector according to a specific probability model

Examples
Expand All @@ -194,7 +214,7 @@ def from_param(self, param, length=50, time_pattern = 'even', settings=None):
ar1.plot_envelope(title=rf'AR(1) synthetic series ($\tau={2},\sigma^2={2}$)')

'''
param = list(param) if param is not None else [] # coerce param into a list, no matter the original format
param = param if isinstance(param, list) else [param] # coerce param into a list, no matter the original format
nparam = len(param)

settings = {} if settings is None else settings.copy()
Expand Down Expand Up @@ -224,12 +244,19 @@ def from_param(self, param, length=50, time_pattern = 'even', settings=None):
# apply surrogate method
y_surr = np.empty_like(times)

if self.method in ['uar1','ar1sim']: #
if nparam<2:
if self.method in ['uar1','ar1sim']: # AR(1) models
if nparam != 2:
warnings.warn(f'The AR(1) model needs 2 parameters, tau and sigma2 (in that order); {nparam} provided. default values used, tau=5, sigma=0.5',UserWarning, stacklevel=2)
param = [5,0.5]
y_surr = tsmodel.uar1_sim(t = times, tau=param[0], sigma_2=param[1])


elif self.method == 'CN': # colored noise
if nparam >1:
raise ValueError(f"1 parameter is needed for this model; only the first of the provided {nparam} will be used")
y_surr = np.empty((length,self.number))
for i in range(self.number):
y_surr[:,i] = tsmodel.colored_noise(alpha=param[0],t=times[:,i])

elif self.method == 'phaseran':
raise ValueError("Phase-randomization is only available in from_series().")

Expand Down
2 changes: 1 addition & 1 deletion pyleoclim/tests/test_core_Coherence.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_plot_t0(self, gen_ts):
fig,ax = coh.plot()
pyleo.closefig(fig)

@pytest.mark.parametrize('method', ['ar1sim','phaseran'])
@pytest.mark.parametrize('method', ['ar1sim','phaseran','uar1','CN'])
def test_plot_t1(self, gen_ts, method):
''' Test Coherence.plot WTC with significance
'''
Expand Down
2 changes: 1 addition & 1 deletion pyleoclim/tests/test_core_PSD.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_plot_t0(self, gen_ts):
class TestUiPsdSignifTest:
''' Tests for PSD.signif_test()
'''
@pytest.mark.parametrize('method',['ar1sim','uar1','ar1asym'])
@pytest.mark.parametrize('method',['ar1sim','uar1','ar1asym','CN'])
def test_signif_test_t0(self,method,gen_ts):
''' Test PSD.signif_test() with default parameters
'''
Expand Down
2 changes: 1 addition & 1 deletion pyleoclim/tests/test_core_Scalogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_signif_test_t0(self, wave_method):
fig,ax = scal_signif.plot(signif_thresh=0.99)
pyleo.closefig(fig)

@pytest.mark.parametrize('method',['ar1sim','uar1','ar1asym'])
@pytest.mark.parametrize('method',['ar1sim','uar1','ar1asym','CN'])
def test_signif_test_t1(self,method):
''' Test scalogram.signif_test() with various surrogates
'''
Expand Down
2 changes: 1 addition & 1 deletion pyleoclim/tests/test_core_Series.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,7 @@ def test_summary_plot_t1(self, pinkseries):
class TestUISeriesCorrelation:
''' Test Series.correlation()
'''
@pytest.mark.parametrize('sig_method', ['ttest','built-in','ar1sim','phaseran'])
@pytest.mark.parametrize('sig_method', ['ttest','built-in','ar1sim','phaseran','CN'])
@pytest.mark.parametrize('number', [2,5])
def test_correlation_t0a(self, sig_method, number, eps=0.1):
''' Test the various significance methods
Expand Down
33 changes: 26 additions & 7 deletions pyleoclim/tests/test_core_SurrogateSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,38 @@ def test_surrogates_ar1(self, method, param, nsim=10, eps=1, seed = 108):
tau[i], sigma_2[i] = tsmodel.uar1_fit(ts.value, ts.time)
assert np.abs(tau.mean()-param[0]) < eps
assert np.abs(sigma_2.mean()-param[1]) < eps

@pytest.mark.parametrize('method',['ar1sim','uar1','phaseran'])

@pytest.mark.parametrize('param',[1.0,[1.0]])
def test_surrogates_CN(self, param, nsim=10, eps=.1, seed = 108):
''' Generate colored noise surrogates based known parameters,
estimate the parameters from the surrogates series and verify accuracy
'''
CN = pyleo.SurrogateSeries(method='CN', number=nsim, seed=seed)
CN.from_param(param = param, length=200)
beta = np.empty((nsim))
for i, ts in enumerate(CN.series_list):
beta[i] = ts.interp().spectral(method='cwt').beta_est().beta_est_res['beta']
assert np.abs(beta.mean()-1.0) < eps

def test_surrogates_CN2(self, nsim=10):
''' Generate colored noise surrogates with too many params; check error msg
'''
CN = pyleo.SurrogateSeries(method='CN', number=nsim)
with pytest.raises(ValueError):
CN.from_param(param = [2.0, 3.0], length=200) # check that this returns an error

@pytest.mark.parametrize('method',['ar1sim','uar1','phaseran','CN'])
@pytest.mark.parametrize('number',[1,5])
def test_surrogates_match(self, method, number):
''' Test that from_series() work with all methods, matches the original
''' Test that from_series() works with all methods, matches the original
time axis AND that number can be varied
'''
t, v = pyleo.utils.gen_ts(model='colored_noise', nt=100)
ts = pyleo.Series(time = t, value = v, verbose=False, auto_time_params=True)
ar1 = pyleo.SurrogateSeries(method=method, number=number)
ar1.from_series(ts)
assert len(ar1.series_list) == number # test right number
for s in ar1.series_list:
surr = pyleo.SurrogateSeries(method=method, number=number)
surr.from_series(ts)
assert len(surr.series_list) == number # test right number
for s in surr.series_list:
assert_array_equal(s.time, ts.time) # test time axis match

@pytest.mark.parametrize('delta_t',[1,3])
Expand Down
4 changes: 0 additions & 4 deletions pyleoclim/utils/correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@

import numpy as np
import scipy.stats as stats
#from scipy.stats import pearsonr
#from scipy.stats.mstats import gmean
#from scipy.stats import t as stu
#from scipy.stats import gaussian_kde
from sklearn import preprocessing
from .tsmodel import ar1_fit_evenly, isopersistent_rn
from .tsutils import phaseran
Expand Down
Loading
Loading