From f56d3916486d4570e6b83a89d0360146b464c4c8 Mon Sep 17 00:00:00 2001 From: khider <11758571+khider@users.noreply.github.com> Date: Thu, 11 Apr 2024 10:03:18 -0700 Subject: [PATCH 01/10] Update series.py --- pyleoclim/core/series.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index 3ddd3953..812c0876 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3399,8 +3399,6 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = nsim : int the number of simulations (default: 1000) - method : str, {'ttest','ar1sim','phaseran' (default)} - method for significance testing surr_settings : dict Parameters for surrogate generator. See individual methods for details. @@ -3485,8 +3483,8 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = method = 'ar1sim' settings = {} if settings is None else settings.copy() - corr_args = {'alpha': alpha, 'method': method} - corr_args.update(settings) + #corr_args = {'alpha': alpha, 'method': method} + #corr_args.update(settings) ms = MultipleSeries([self, target_series]) if list(self.time) != list(target_series.time): @@ -3517,18 +3515,25 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = pval = res.pvalue if len(res) > 1 else np.nan signif = pval <= alpha elif method in supported_surrogates: - number = corr_args['nsim'] if 'nsim' in corr_args.keys() else 1000 - seed = corr_args['seed'] if 'seed' in corr_args.keys() else None + if 'nsim' in settings.keys(): + number = settings['nsim'] + settings.pop('nsim') + else: + number = 1000 + + settings['seed'] = seed + settings['method'] = method + settings['number'] = number + #number = corr_args['nsim'] if 'nsim' in corr_args.keys() else 1000 + #seed = corr_args['seed'] if 'seed' in corr_args.keys() else None #method = corr_args['method'] if 'method' in corr_args.keys() else None - surr_settings = corr_args['surr_settings'] if 'surr_settings' in corr_args.keys() else None + #surr_settings = corr_args['surr_settings'] if 'surr_settings' in corr_args.keys() else None # compute correlation statistic stat = corrutils.association(ts0.value,ts1.value,statistic)[0] - ts0_surr = ts0.surrogates(number=number, seed=seed, - method=method, settings=surr_settings) - ts1_surr = ts1.surrogates(number=number, seed=seed, - method=method, settings=surr_settings) + ts0_surr = ts0.surrogates(**settings) + ts1_surr = ts1.surrogates(**settings) stat_surr = np.empty((number)) for i in tqdm(range(number), desc='Evaluating association on surrogate pairs', total=number, disable=mute_pbar): stat_surr[i] = corrutils.association(ts0_surr.series_list[i].value, From 7066bf8e9a91128c1b07aa02e4ede8284482552c Mon Sep 17 00:00:00 2001 From: CommonClimate Date: Sun, 14 Apr 2024 14:26:10 -0700 Subject: [PATCH 02/10] surrogate redesign from_series() and from_param() --- pyleoclim/core/ensembleseries.py | 4 +- pyleoclim/core/series.py | 22 +-- pyleoclim/core/surrogateseries.py | 219 +++++++++++++++++++++++++++--- pyleoclim/utils/tsmodel.py | 31 ++--- 4 files changed, 221 insertions(+), 55 deletions(-) diff --git a/pyleoclim/core/ensembleseries.py b/pyleoclim/core/ensembleseries.py index 107a2181..42f8664c 100644 --- a/pyleoclim/core/ensembleseries.py +++ b/pyleoclim/core/ensembleseries.py @@ -22,8 +22,6 @@ import matplotlib.transforms as transforms import matplotlib as mpl from tqdm import tqdm -#import warnings -#from scipy.stats.mstats import mquantiles class EnsembleSeries(MultipleSeries): ''' EnsembleSeries object @@ -972,7 +970,7 @@ def histplot(self, figsize=[10, 4], title=None, savefig_settings=None, ax=None, ylabel='KDE', vertical=False, edgecolor='w', **plot_kwargs): """ Plots the distribution of the timeseries across ensembles - Reuses seaborn [histplot](https://seaborn.pydata.org/generated/seaborn.histplot.html) function. + Reuses the seaborn [histplot](https://seaborn.pydata.org/generated/seaborn.histplot.html) function. Parameters ---------- diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index 812c0876..d1094d79 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -24,9 +24,7 @@ from ..core.scalograms import Scalogram from ..core.coherence import Coherence from ..core.corr import Corr -from ..core.surrogateseries import SurrogateSeries from ..core.resolutions import Resolution -from ..core.surrogateseries import supported_surrogates import seaborn as sns import matplotlib.pyplot as plt @@ -3656,14 +3654,7 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', Parameters ---------- - method : {ar1sim, phaseran, uar1} - The method used to generate surrogates of the timeseries - - Note that phaseran assumes an odd number of samples. If the series - has even length, the last point is dropped to satisfy this requirement - - number : int - The number of surrogates to generate + time_pattern : str {match, even, random} The pattern used to generate the surrogate time axes @@ -3709,7 +3700,7 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', elif time_pattern == "even": if "time_increment" not in settings: warnings.warn("'time_increment' not found in the dictionary, default set to 1.",stacklevel=2) - time_increment = 1 + time_increment = np.median(np.diff(self.time)) else: time_increment = settings["time_increment"] @@ -3749,15 +3740,6 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', # elif method == 'fBm': # # TODO : implement Stochastic - - # THIS SHOULD BE UNNECESSARY - # # reshape - # if len(np.shape(y_surr)) == 1: - # y_surr = y_surr[:, np.newaxis] - - # if len(np.shape(times)) == 1: - # times = times[:, np.newaxis] - # wrap it all up with a bow s_list = [] for i, (t, y) in enumerate(zip(times.T,y_surr.T)): diff --git a/pyleoclim/core/surrogateseries.py b/pyleoclim/core/surrogateseries.py index 1fe4c32c..536baf12 100644 --- a/pyleoclim/core/surrogateseries.py +++ b/pyleoclim/core/surrogateseries.py @@ -1,34 +1,221 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -SurrogateSeries is a child of MultipleSeries, designed for Monte Carlo tests +SurrogateSeries is a child of EnsembleSeries, designed for parametric and non-parametric Monte Carlo tests """ -from ..core.multipleseries import MultipleSeries +from ..core.ensembleseries import EnsembleSeries +from ..core.series import Series +from ..utils import tsutils, tsmodel + +import numpy as np +import warnings supported_surrogates = frozenset(['ar1sim','phaseran', 'uar1']) # broadcast all supported surrogates as global variable, for exception handling -class SurrogateSeries(MultipleSeries): - ''' Object containing surrogate timeseries, usually obtained through recursive modeling (e.g., AR(1)) +class SurrogateSeries(EnsembleSeries): + ''' Object containing surrogate timeseries, obtained by emulating an existing series, or more a statistical model - Surrogate Series is a child of MultipleSeries. All methods available for MultipleSeries are available for surrogate series. - EnsembleSeries would be a more logical choice, but it creates circular imports that break the package. + Surrogate Series is a child of EnsembleSeries. All methods available for EnsembleSeries are available for surrogate series. + ''' - def __init__(self, series_list, label, surrogate_method=None, surrogate_args=None): + def __init__(self, series_list=[], number=1, method=None, label=None): + ''' + Initiatlize a SurrogateSeries object. The parameters below are then used for series generation and labeling. + + Parameters + ---------- + series_list : list + a list of pyleoclim.Series objects. The default is []. + + number : int + The number of surrogates to generate. THe default is 1. + + method : str {ar1sim, phaseran, uar1} + The name of the method used to generate surrogates of the timeseries + + label : str + label of the collection of timeseries (e.g. 'SOI surrogates [AR(1) MLE]') + If not provided, defaults to "series surrogates [{method}]" + + Raises + ------ + ValueError + errors out if an unknown method is specified + + ''' self.series_list = series_list - self.surrogate_method = surrogate_method - self.surrogate_args = surrogate_args + self.method = method + self.number = number # refine the display name - if surrogate_method == 'ar1sim': - self.label = str(label or "series") + " surrogates [AR(1)]" - elif surrogate_method == 'phaseran': + if method == 'ar1sim': + self.label = str(label or "series") + " surrogates [AR(1) MoM]" + elif method == 'phaseran': self.label = str(label or "series") + " surrogates [phase-randomized]" - elif surrogate_method == 'uar1': - self.label = str(label or "series") + " surrogates [uar1]" + elif method == 'uar1': + self.label = str(label or "series") + " surrogates [AR(1) MLE]" else: - raise ValueError('Surrogate method should either be "ar1sim", "phaseran" or "uar1"') + raise ValueError(f"Unknown method: {self.method}. Please use one of {supported_surrogates}") + + def from_series(self, target_series, seed=None): + ''' + Fashion the SurrogateSeries object after a target series + + Parameters + ---------- + target_series : Series object + target Series used to infer surrogate properties + + seed : int + Control random seed option for reproducibility + + Returns + ------- + surr : SurrogateSeries + + See also + -------- + + pyleoclim.utils.tsmodel.ar1_sim : AR(1) simulator + pyleoclim.utils.tsmodel.uar1_sim : maximum likelihood AR(1) simulator + pyleoclim.utils.tsutils.phaseran2 : phase randomization + + Examples + -------- + + SOI = pyleo.utils.load_dataset('SOI') + SOI_surr = pyleo.SurrogateSeries(method='phaseran', number=10) + SOI_surr.from_series(SOI) + + ''' + #settings = {} if settings is None else settings.copy() + + if not isinstance(target_series, Series): + raise TypeError(f"Expected pyleo.Series, got: {type(target_series)}") + + if seed is not None: + np.random.seed(seed) + + # generate time axes according to provided pattern + times = np.tile(target_series.time, (self.number, 1)).T + + # apply surrogate method + if self.method == 'ar1sim': + y_surr = tsmodel.ar1_sim(target_series.value, self.number, target_series.time) + + elif self.method == 'phaseran': + if target_series.is_evenly_spaced(): + y_surr = tsutils.phaseran2(target_series.value, self.number) + else: + raise ValueError("Phase-randomization presently requires evenly-spaced series.") + + elif self.method == 'uar1': + # estimate theta with MLE + theta_hat = tsmodel.uar1_fit(target_series.value, target_series.time) + # generate surrogates + y_surr = np.empty_like(times) + for j in range(self.number): + y_surr[:,j] = tsmodel.uar1_sim(t = times[:,j], + tau_0=theta_hat[0],sigma_2_0=theta_hat[1]) + + s_list = [] + for i, (t, y) in enumerate(zip(times.T,y_surr.T)): + ts = Series(time=t, value=y, + time_name=target_series.time_name, + time_unit=target_series.time_unit, + value_name=target_series.value_name, + value_unit=target_series.value_unit, + label = str(target_series.label or '') + " surr #" + str(i+1), + verbose=False, auto_time_params=False) + s_list.append(ts) + + self.series_list = s_list + + def from_params(self, params, length=50, time_pattern = 'even', seed=None, settings=None): + ''' + Simulate the SurrogateSeries from a given probability model + + Parameters + ---------- + params : list + model parameters (e.g. [tau, sigma0] for an AR(1) model) - + length : int + Length of the series. Default: 50 + + time_pattern : str {match, even, random} + The pattern used to generate the surrogate time axes + 'even' uses an evenly-spaced time with spacing `delta_t` specified in settings (if not specified, defaults to 1) + 'random' uses random_time_index() with specified distribution and parameters (default: 'exponential' with parameter 1) + 'custom' + seed : int + Control random seed option for reproducibility + + settings : dict + Parameters for surrogate generator. See individual methods for details. + + Returns + ------- + surr : SurrogateSeries + + See also + -------- + + pyleoclim.utils.tsmodel.ar1_sim : AR(1) simulator + pyleoclim.utils.tsmodel.uar1_sim : maximum likelihood AR(1) simulator + + Examples + -------- + ar1 = pyleo.SurrogateSeries(method='ar1sim', number=10) + ar1.from_process(length=100) + + ''' + params = list(params) # coerce params into a list, no matter the original format + settings = {} if settings is None else settings.copy() + + if seed is not None: + np.random.seed(seed) + + # generate time axes according to provided pattern + if time_pattern == "even": + time_increment = settings["time_increment"] if "time_increment" in settings else 1 + t = np.cumsum([time_increment]*length) + times = np.tile(t, (self.number, 1)).T + elif time_pattern == "random": + times = np.zeros((length, self.number)) + for i in range(self.number): + dist_name = settings['delta_t_dist'] if "delta_t_dist" in settings else "exponential" + dist_param = settings['param'] if "param" in settings else [1] + times[:, i] = tsmodel.random_time_index(length, dist_name,dist_param) + elif time_pattern == 'specified': + if "time" not in settings: + raise ValueError("'time' not found in settings") + else: + times = np.tile(settings["time"], (self.number, 1)).T + else: + raise ValueError(f"Unknown time pattern: {time_pattern}") + # apply surrogate method + if self.method == 'ar1sim' or 'uar1': + y_surr = np.empty_like(times) + if len(params)<2: + raise ValueError('The AR(1) model needs 2 paramaters, tau and sigma2') + # generate surrogates + for j in range(self.number): + y_surr[:,j] = tsmodel.uar1_sim(t = times[:,j], + tau=params[0],sigma_2=params[1]) + elif self.method == 'phaseran': + raise ValueError("Phase-randomization is only available in from_series().") + + # create the series_list + s_list = [] + for i, (t, y) in enumerate(zip(times.T,y_surr.T)): + ts = Series(time=t, value=y, + label = str(self.label or '') + " surr #" + str(i+1), + verbose=False, auto_time_params=False) + s_list.append(ts) + + self.series_list = s_list + diff --git a/pyleoclim/utils/tsmodel.py b/pyleoclim/utils/tsmodel.py index d003483d..d36c9af0 100644 --- a/pyleoclim/utils/tsmodel.py +++ b/pyleoclim/utils/tsmodel.py @@ -14,7 +14,7 @@ is_evenly_spaced ) from scipy import optimize -from scipy.optimize import minimize # for MLE estimation of tau_0 +from scipy.optimize import minimize # for MLE estimation of tau __all__ = [ @@ -626,7 +626,7 @@ def n_ll_unevenly_spaced_ar1(theta, y, t): Parameters ---------- theta: array, length 2 - the first value is tau_0, the second value sigma^2. + the first value is tau, the second value sigma^2. y: array,length n The vector of observations. @@ -638,12 +638,12 @@ def n_ll_unevenly_spaced_ar1(theta, y, t): """ # define n n = len(y) - log_tau_0 = theta[0] - log_sigma_2_0 = theta[1] - tau_0 = np.exp(log_tau_0) - sigma_2 = np.exp(log_sigma_2_0) + log_tau = theta[0] + log_sigma_2 = theta[1] + tau = np.exp(log_tau) + sigma_2 = np.exp(log_sigma_2) delta = np.diff(t) - phi = np.exp((-delta / tau_0)) + phi = np.exp((-delta / tau)) term_1 = y[1:n] - (phi * y[0:(n-1)]) term_2 = pow(term_1, 2) term_3 = term_2 / (1- pow(phi, 2)) @@ -654,7 +654,7 @@ def n_ll_unevenly_spaced_ar1(theta, y, t): def uar1_fit(y, t): ''' - Maximum Likelihood Estimation of parameters tau_0 and sigma_2_0 + Maximum Likelihood Estimation of parameters tau and sigma_2 Parameters ---------- @@ -664,13 +664,13 @@ def uar1_fit(y, t): Time index values of the time series Returns: - An array containing the estimated parameters tau_0_hat and sigma_2_hat, first entry is tau_0_hat, second entry is sigma_2_hat + An array containing the estimated parameters tau_hat and sigma_2_hat, first entry is tau_hat, second entry is sigma_2_hat ------- None. ''' - # obtain initial value for tau_0 + # obtain initial value for tau tau_initial_value = tau_estimation(y= y, t = t) # obtain initial value for sifma_2_0 sigma_2_initial_value = np.var(y) @@ -681,7 +681,7 @@ def uar1_fit(y, t): return theta_hat -def uar1_sim(t, tau_0=5, sigma_2_0=2): +def uar1_sim(t, tau=5, sigma_2=2): """ Generate a time series of length n from an autoregressive process of order 1 with evenly/unevenly spaced time points. @@ -691,10 +691,10 @@ def uar1_sim(t, tau_0=5, sigma_2_0=2): t : array Time axis - tau_0 : float + tau : float Time decay parameter of the AR(1) model ($\phi = e^{-\tau}$) - sigma_2_0 : float + sigma_2 : float Variance of the innovations Returns @@ -717,9 +717,8 @@ def uar1_sim(t, tau_0=5, sigma_2_0=2): ys[0] = z[0] for i in range(n-1): delta_i = t[i+1] - t[i] - phi_i = np.exp(-delta_i / tau_0) - sigma_2_i = sigma_2_0 * (1-pow(phi_i, 2)) - sigma_i = np.sqrt(sigma_2_i) + phi_i = np.exp(-delta_i / tau) + sigma_i = np.sqrt(sigma_2 * (1-pow(phi_i, 2))) ys[i+1] = phi_i * ys[i] + sigma_i * z[i] return ys From 15592fa0154e0e1b4941b7f299076d2012b70c1a Mon Sep 17 00:00:00 2001 From: CommonClimate Date: Mon, 15 Apr 2024 23:39:48 -0700 Subject: [PATCH 03/10] surrogate series methods + docstrings fixed bugs in uar1_sim and EnsembleSeries.plot_traces() --- pyleoclim/core/ensembleseries.py | 2 ++ pyleoclim/core/surrogateseries.py | 59 +++++++++++++++++-------------- pyleoclim/utils/tsmodel.py | 40 ++++++++++++--------- 3 files changed, 58 insertions(+), 43 deletions(-) diff --git a/pyleoclim/core/ensembleseries.py b/pyleoclim/core/ensembleseries.py index 42f8664c..062e8ceb 100644 --- a/pyleoclim/core/ensembleseries.py +++ b/pyleoclim/core/ensembleseries.py @@ -522,6 +522,8 @@ def plot_traces(self, figsize=[10, 4], xlabel=None, ylabel=None, title=None, num ''' savefig_settings = {} if savefig_settings is None else savefig_settings.copy() lgd_kwargs = {} if lgd_kwargs is None else lgd_kwargs.copy() + + num_traces = min(num_traces, len(self.series_list)) # restrict to the smaller of the two # generate default axis labels time_label, value_label = self.make_labels() diff --git a/pyleoclim/core/surrogateseries.py b/pyleoclim/core/surrogateseries.py index 536baf12..53d0d363 100644 --- a/pyleoclim/core/surrogateseries.py +++ b/pyleoclim/core/surrogateseries.py @@ -85,8 +85,12 @@ def from_series(self, target_series, seed=None): -------- SOI = pyleo.utils.load_dataset('SOI') - SOI_surr = pyleo.SurrogateSeries(method='phaseran', number=10) + SOI_surr = pyleo.SurrogateSeries(method='phaseran', number=4) SOI_surr.from_series(SOI) + fig, ax = SOI_surr.plot_traces() + SOI.plot(ax=ax,color='black',linewidth=1, ylim=[-6,3]) + ax.legend(loc='lower center', bbox_to_anchor=(0.5, 0), ncol=2) + ax.set_title(SOI_surr.label) ''' #settings = {} if settings is None else settings.copy() @@ -112,13 +116,9 @@ def from_series(self, target_series, seed=None): elif self.method == 'uar1': # estimate theta with MLE - theta_hat = tsmodel.uar1_fit(target_series.value, target_series.time) - # generate surrogates - y_surr = np.empty_like(times) - for j in range(self.number): - y_surr[:,j] = tsmodel.uar1_sim(t = times[:,j], - tau_0=theta_hat[0],sigma_2_0=theta_hat[1]) - + tau, sigma_2 = tsmodel.uar1_fit(target_series.value, target_series.time) + y_surr = tsmodel.uar1_sim(t = times, tau=tau, sigma_2=sigma_2) + s_list = [] for i, (t, y) in enumerate(zip(times.T,y_surr.T)): ts = Series(time=t, value=y, @@ -144,11 +144,13 @@ def from_params(self, params, length=50, time_pattern = 'even', seed=None, setti length : int Length of the series. Default: 50 - time_pattern : str {match, even, random} + time_pattern : str {even, random, specified} The pattern used to generate the surrogate time axes - 'even' uses an evenly-spaced time with spacing `delta_t` specified in settings (if not specified, defaults to 1) - 'random' uses random_time_index() with specified distribution and parameters (default: 'exponential' with parameter 1) - 'custom' + * 'even' uses an evenly-spaced time with spacing `delta_t` specified in settings (if not specified, defaults to 1.0) + * 'random' uses random_time_index() with specified distribution and parameters + Relevant settings are `delta_t_dist` and `param`. (default: 'exponential' with parameter = 1) + * 'specified': uses time axis `time` from `settings`. + seed : int Control random seed option for reproducibility @@ -164,14 +166,18 @@ def from_params(self, params, length=50, time_pattern = 'even', seed=None, setti pyleoclim.utils.tsmodel.ar1_sim : AR(1) simulator pyleoclim.utils.tsmodel.uar1_sim : maximum likelihood AR(1) simulator + pyleoclim.utils.tsmodel.random_time_index : Generate time increment vector according to a specific probability model Examples -------- ar1 = pyleo.SurrogateSeries(method='ar1sim', number=10) - ar1.from_process(length=100) + ar1.from_params(length=100, params = [2,2]) + ar1.plot_envelope(title=rf'AR(1) synthetic series ($\tau={2},\sigma^2={2}$)') ''' - params = list(params) # coerce params into a list, no matter the original format + params = list(params) if params is not None else [] # coerce params into a list, no matter the original format + nparams = len(params) + settings = {} if settings is None else settings.copy() if seed is not None: @@ -179,8 +185,8 @@ def from_params(self, params, length=50, time_pattern = 'even', seed=None, setti # generate time axes according to provided pattern if time_pattern == "even": - time_increment = settings["time_increment"] if "time_increment" in settings else 1 - t = np.cumsum([time_increment]*length) + delta_t = settings["delta_t"] if "delta_t" in settings else 1.0 + t = np.cumsum([float(delta_t)]*length) times = np.tile(t, (self.number, 1)).T elif time_pattern == "random": times = np.zeros((length, self.number)) @@ -195,16 +201,17 @@ def from_params(self, params, length=50, time_pattern = 'even', seed=None, setti times = np.tile(settings["time"], (self.number, 1)).T else: raise ValueError(f"Unknown time pattern: {time_pattern}") - + + # apply surrogate method - if self.method == 'ar1sim' or 'uar1': - y_surr = np.empty_like(times) - if len(params)<2: - raise ValueError('The AR(1) model needs 2 paramaters, tau and sigma2') - # generate surrogates - for j in range(self.number): - y_surr[:,j] = tsmodel.uar1_sim(t = times[:,j], - tau=params[0],sigma_2=params[1]) + y_surr = np.empty_like(times) + + if self.method == 'uar1' or 'ar1sim': + if nparams<2: + warnings.warn(f'The AR(1) model needs 2 parameters, tau and sigma2 (in that order); {nparams} provided. default values used',UserWarning, stacklevel=2) + params = [5,0.5] + y_surr = tsmodel.uar1_sim(t = times, tau=params[0], sigma_2=params[1]) + elif self.method == 'phaseran': raise ValueError("Phase-randomization is only available in from_series().") @@ -213,7 +220,7 @@ def from_params(self, params, length=50, time_pattern = 'even', seed=None, setti for i, (t, y) in enumerate(zip(times.T,y_surr.T)): ts = Series(time=t, value=y, label = str(self.label or '') + " surr #" + str(i+1), - verbose=False, auto_time_params=False) + verbose=False, auto_time_params=True) s_list.append(ts) self.series_list = s_list diff --git a/pyleoclim/utils/tsmodel.py b/pyleoclim/utils/tsmodel.py index d36c9af0..07d98caf 100644 --- a/pyleoclim/utils/tsmodel.py +++ b/pyleoclim/utils/tsmodel.py @@ -151,7 +151,7 @@ def ar1_sim(y, p, t=None): n = np.size(y) ysim = np.empty(shape=(n, p)) # declare array - sig = np.std(y) + sig = np.std(y) # Not MLE estimate in any case if is_evenly_spaced(t): g = ar1_fit_evenly(y) @@ -179,7 +179,7 @@ def ar1_sim(y, p, t=None): def gen_ar1_evenly(t, g, scale=1, burnin=50): ''' Generate AR(1) series samples - MARK FOR DEPRECATION once ar1fit_ml is adopted + MARK FOR DEPRECATION once uar1_fit is adopted Wrapper for the function `statsmodels.tsa.arima_process.arma_generate_sample `_. used to generate an ARMA @@ -194,7 +194,7 @@ def gen_ar1_evenly(t, g, scale=1, burnin=50): lag-1 autocorrelation scale : float - The standard deviation of noise. + The standard deviation of the noise. burnin : int Number of observation at the beginning of the sample to drop. Used to reduce dependence on initial values. @@ -681,7 +681,7 @@ def uar1_fit(y, t): return theta_hat -def uar1_sim(t, tau=5, sigma_2=2): +def uar1_sim(t, tau, sigma_2=1): """ Generate a time series of length n from an autoregressive process of order 1 with evenly/unevenly spaced time points. @@ -707,21 +707,27 @@ def uar1_sim(t, tau=5, sigma_2=2): -------- pyleoclim.utils.tsmodel.uar1_fit : Maximumum likelihood estimate of AR(1) parameters - pyleoclim.utils.tsmodel.random_time_index : Generate time increment vector according to a specific probability model """ - n = len(t) - ys = np.zeros(n) # create empty vector - # generate unevenly spaced AR(1) - z = np.random.normal(loc=0, scale=1, size=n) - ys[0] = z[0] - for i in range(n-1): - delta_i = t[i+1] - t[i] - phi_i = np.exp(-delta_i / tau) - sigma_i = np.sqrt(sigma_2 * (1-pow(phi_i, 2))) - ys[i+1] = phi_i * ys[i] + sigma_i * z[i] - - return ys + if t.ndim == 1: # add extraneous dimension if t is 1d, to write only one loop. + n = len(t); p = 1 + t = t[:,np.newaxis] + else: + n, p = t.shape + + # generate innovations + z = np.random.normal(loc=0, scale=1, size=(n,p)) + y = np.copy(z) # initialize AR(1) vectors + # fill the array + for j in range(p): # Note: this shouldn't work but it does! + for i in range(1, n): + delta_i = t[i] - t[i-1] + phi = np.exp(-delta_i / tau) + sigma_i = np.sqrt(sigma_2 * (1-phi**2)) + y[i] = phi * y[i-1] + sigma_i * z[i] + + y = np.squeeze(y) # squeeze superfluous dimensions + return y def inverse_cumsum(arr): return np.diff(np.concatenate(([0], arr))) From 8f56ed728aa38a1ec2c2ce6544a6ba3e755f0ca9 Mon Sep 17 00:00:00 2001 From: CommonClimate Date: Tue, 16 Apr 2024 09:54:23 -0700 Subject: [PATCH 04/10] removed surrogate method from Series class --- pyleoclim/core/series.py | 202 +++++++++---------- pyleoclim/tests/test_core_Series.py | 61 +----- pyleoclim/tests/test_core_SurrogateSeries.py | 79 ++++++++ 3 files changed, 181 insertions(+), 161 deletions(-) create mode 100644 pyleoclim/tests/test_core_SurrogateSeries.py diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index d1094d79..7dd38b67 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3645,119 +3645,119 @@ def causality(self, target_series, method='liang', timespan=None, settings=None, causal_res = spec_func[method](value1, value2, **args[method]) return causal_res - def surrogates(self, method='ar1sim', number=1, time_pattern='match', - length=None, seed=None, settings=None): - ''' Generate surrogates of the Series object according to "method" + # def surrogates(self, method='ar1sim', number=1, time_pattern='match', + # length=None, seed=None, settings=None): + # ''' Generate surrogates of the Series object according to "method" - For now, assumes uniform spacing and increasing time axis + # For now, assumes uniform spacing and increasing time axis - Parameters - ---------- + # Parameters + # ---------- - time_pattern : str {match, even, random} - The pattern used to generate the surrogate time axes - 'match' uses the same pattern as the original Series - 'even' uses an evenly-spaced time with spacing delta_t specified in settings (will return error if not specified) - 'random' uses random_time_index() with specified distribution and parameters (default: 'exponential' with parameter 1) + # time_pattern : str {match, even, random} + # The pattern used to generate the surrogate time axes + # 'match' uses the same pattern as the original Series + # 'even' uses an evenly-spaced time with spacing delta_t specified in settings (will return error if not specified) + # 'random' uses random_time_index() with specified distribution and parameters (default: 'exponential' with parameter 1) - length : int - Length of the series + # length : int + # Length of the series - seed : int - Control seed option for reproducibility + # seed : int + # Control seed option for reproducibility - settings : dict - Parameters for surrogate generator. See individual methods for details. + # settings : dict + # Parameters for surrogate generator. See individual methods for details. - Returns - ------- - surr : SurrogateSeries + # Returns + # ------- + # surr : SurrogateSeries - See also - -------- - - pyleoclim.utils.tsmodel.ar1_sim : AR(1) simulator - pyleoclim.utils.tsmodel.uar1_sim : maximum likelihood AR(1) simulator - pyleoclim.utils.tsutils.phaseran2 : phase randomization - pyleoclim.utils.tsutils.random_time_index : random time index vector according to a specific probability model - - ''' - settings = {} if settings is None else settings.copy() - - if seed is not None: - np.random.seed(seed) - - if length is None: - n = len(self.value) - else: - n = length - - # generate time axes according to provided pattern - if time_pattern == "match": - times = np.tile(self.time, (number, 1)).T - elif time_pattern == "even": - if "time_increment" not in settings: - warnings.warn("'time_increment' not found in the dictionary, default set to 1.",stacklevel=2) - time_increment = np.median(np.diff(self.time)) - else: - time_increment = settings["time_increment"] - - t = np.cumsum([time_increment]*n) - times = np.tile(t, (number, 1)).T - elif time_pattern == "random": - times = np.zeros((n, number)) - for i in range(number): - times[:, i] = tsmodel.random_time_index(n = n, **settings) # TODO: check that this does not break when unexpected keywords are passed in `settings` - else: - raise ValueError(f"Unknown time pattern: {time_pattern}") + # See also + # -------- - # apply surrogate method - if method == 'ar1sim': - if time_pattern != 'match': - raise ValueError('Only a matching time pattern is supported with this method') - else: - y_surr = tsmodel.ar1_sim(self.value, number, self.time) # CHECK: how does this handle the new time? + # pyleoclim.utils.tsmodel.ar1_sim : AR(1) simulator + # pyleoclim.utils.tsmodel.uar1_sim : maximum likelihood AR(1) simulator + # pyleoclim.utils.tsutils.phaseran2 : phase randomization + # pyleoclim.utils.tsutils.random_time_index : random time index vector according to a specific probability model - elif method == 'phaseran': - if self.is_evenly_spaced() and time_pattern != "random": - y_surr = tsutils.phaseran2(self.value, number) - else: - raise ValueError("Phase-randomization presently requires evenly-spaced series.") - - elif method == 'uar1': - # estimate theta with MLE - theta_hat = tsmodel.uar1_fit(self.value, self.time) - # generate surrogates - y_surr = np.empty_like(times) - for j in range(number): - y_surr[:,j] = tsmodel.uar1_sim(t = times[:,j], - tau_0=theta_hat[0],sigma_2_0=theta_hat[1]) - - # elif method == 'power-law': - # # TODO : implement Stochastic - # elif method == 'fBm': - # # TODO : implement Stochastic - - # wrap it all up with a bow - s_list = [] - for i, (t, y) in enumerate(zip(times.T,y_surr.T)): - ts = Series(time=t, value=y, - time_name=self.time_name, - time_unit=self.time_unit, - value_name=self.value_name, - value_unit=self.value_unit, - label = str(self.label or '') + " surr #" + str(i+1), - verbose=False, auto_time_params=True) - s_list.append(ts) - - surr = SurrogateSeries(series_list=s_list, - label = self.label, - surrogate_method=method, - surrogate_args=settings) - - return surr + # ''' + # settings = {} if settings is None else settings.copy() + + # if seed is not None: + # np.random.seed(seed) + + # if length is None: + # n = len(self.value) + # else: + # n = length + + # # generate time axes according to provided pattern + # if time_pattern == "match": + # times = np.tile(self.time, (number, 1)).T + # elif time_pattern == "even": + # if "time_increment" not in settings: + # warnings.warn("'time_increment' not found in the dictionary, default set to 1.",stacklevel=2) + # time_increment = np.median(np.diff(self.time)) + # else: + # time_increment = settings["time_increment"] + + # t = np.cumsum([time_increment]*n) + # times = np.tile(t, (number, 1)).T + # elif time_pattern == "random": + # times = np.zeros((n, number)) + # for i in range(number): + # times[:, i] = tsmodel.random_time_index(n = n, **settings) # TODO: check that this does not break when unexpected keywords are passed in `settings` + # else: + # raise ValueError(f"Unknown time pattern: {time_pattern}") + + # # apply surrogate method + # if method == 'ar1sim': + # if time_pattern != 'match': + # raise ValueError('Only a matching time pattern is supported with this method') + # else: + # y_surr = tsmodel.ar1_sim(self.value, number, self.time) # CHECK: how does this handle the new time? + + # elif method == 'phaseran': + # if self.is_evenly_spaced() and time_pattern != "random": + # y_surr = tsutils.phaseran2(self.value, number) + # else: + # raise ValueError("Phase-randomization presently requires evenly-spaced series.") + + # elif method == 'uar1': + # # estimate theta with MLE + # theta_hat = tsmodel.uar1_fit(self.value, self.time) + # # generate surrogates + # y_surr = np.empty_like(times) + # for j in range(number): + # y_surr[:,j] = tsmodel.uar1_sim(t = times[:,j], + # tau_0=theta_hat[0],sigma_2_0=theta_hat[1]) + + # # elif method == 'power-law': + # # # TODO : implement Stochastic + # # elif method == 'fBm': + # # # TODO : implement Stochastic + + # # wrap it all up with a bow + # s_list = [] + # for i, (t, y) in enumerate(zip(times.T,y_surr.T)): + # ts = Series(time=t, value=y, + # time_name=self.time_name, + # time_unit=self.time_unit, + # value_name=self.value_name, + # value_unit=self.value_unit, + # label = str(self.label or '') + " surr #" + str(i+1), + # verbose=False, auto_time_params=True) + # s_list.append(ts) + + # surr = SurrogateSeries(series_list=s_list, + # label = self.label, + # surrogate_method=method, + # surrogate_args=settings) + + # return surr def outliers(self,method='kmeans',remove=True, settings=None, fig_outliers=True, figsize_outliers=[10,4], plotoutliers_kwargs=None, savefigoutliers_settings=None, diff --git a/pyleoclim/tests/test_core_Series.py b/pyleoclim/tests/test_core_Series.py index a3c858b7..60512e9f 100644 --- a/pyleoclim/tests/test_core_Series.py +++ b/pyleoclim/tests/test_core_Series.py @@ -537,66 +537,7 @@ def test_invalid(self): ts.sel(time=1, value=1) -class TestUISeriesSurrogates: - ''' Test Series.surrogates() - ''' - def test_surrogates_t0(self, p=2, eps=0.2): - ''' Generate AR(1) surrogates based on a AR(1) series with certain parameters, - and then evaluate and assert the parameters of the surrogates are correct - ''' - g = 0.5 # persistence - ar = [1, -g] - ma = [1, 0] - n = 1000 - ar1 = arma_generate_sample(ar, ma, nsample=n, scale=1) - ts = pyleo.Series(time=np.arange(1000), value=ar1) - - ts_surrs = ts.surrogates(number=p) - for ts_surr in ts_surrs.series_list: - g_surr = tsmodel.ar1_fit(ts_surr.value) - assert np.abs(g_surr-g) < eps - - @pytest.mark.parametrize('number',[1,5]) - def test_surrogates_uar1_match(self, number): - ts = gen_ts(nt=550, alpha=1.0) - # generate surrogates - surr = ts.surrogates(method = 'uar1', number = number, time_pattern ="match") - for i in range(number): - assert(np.allclose(surr.series_list[i].time, ts.time)) - - def test_surrogates_uar1_even(self, p=5): - ts = gen_ts(nt=550, alpha=1.0) - time_incr = np.median(np.diff(ts.time)) - # generate surrogates - surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="even", settings={"time_increment" :time_incr}) - for i in range(p): - assert(np.allclose(tsmodel.inverse_cumsum(surr.series_list[i].time),time_incr)) - - def test_surrogates_uar1_random(self, p=5, tol = 0.5): - tau = 2 - sigma_2 = 1 - n = 500 - # generate time index - t = np.arange(1,(n+1)) - # create time series - ys = tsmodel.uar1_sim(t, tau_0=tau, sigma_2_0=sigma_2) - ts = pyleo.Series(time = t, value=ys, auto_time_params=True,verbose=False) - # generate surrogates default is exponential with parameter value 1 - surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="random") - #surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="uneven",settings={"delta_t_dist" :"poisson","param":[1]} ) - - for i in range(p): - delta_t = tsmodel.inverse_cumsum(surr.series_list[i].time) - # Compute the empirical cumulative distribution function (CDF) of the generated data - empirical_cdf, bins = np.histogram(delta_t, bins=100, density=True) - empirical_cdf = np.cumsum(empirical_cdf) * np.diff(bins) - # Compute the theoretical CDF of the Exponential distribution - theoretical_cdf = expon.cdf(bins[1:], scale=1) - # Trim theoretical_cdf to match the size of empirical_cdf - theoretical_cdf = theoretical_cdf[:len(empirical_cdf)] - # Compute the L2 norm (Euclidean distance) between empirical and theoretical CDFs - l2_norm = np.linalg.norm(empirical_cdf - theoretical_cdf) - assert(l2_norm Date: Tue, 16 Apr 2024 12:43:12 -0700 Subject: [PATCH 05/10] first pytest for test_core_SurrogateSeries.py --- pyleoclim/tests/test_core_Series.py | 6 +- pyleoclim/tests/test_core_SurrogateSeries.py | 107 ++++++++++--------- 2 files changed, 59 insertions(+), 54 deletions(-) diff --git a/pyleoclim/tests/test_core_Series.py b/pyleoclim/tests/test_core_Series.py index 60512e9f..7e1d940d 100644 --- a/pyleoclim/tests/test_core_Series.py +++ b/pyleoclim/tests/test_core_Series.py @@ -28,11 +28,11 @@ #from urllib.request import urlopen import pyleoclim as pyleo -import pyleoclim.utils.tsmodel as tsmodel +#import pyleoclim.utils.tsmodel as tsmodel import pyleoclim.utils.tsbase as tsbase -from statsmodels.tsa.arima_process import arma_generate_sample -from scipy.stats import expon +#from statsmodels.tsa.arima_process import arma_generate_sample +#from scipy.stats import expon # a collection of useful functions diff --git a/pyleoclim/tests/test_core_SurrogateSeries.py b/pyleoclim/tests/test_core_SurrogateSeries.py index f4c93034..8df7de39 100644 --- a/pyleoclim/tests/test_core_SurrogateSeries.py +++ b/pyleoclim/tests/test_core_SurrogateSeries.py @@ -14,66 +14,71 @@ ''' import numpy as np import pytest - +import pyleoclim as pyleo +import pyleoclim.utils.tsmodel as tsmodel class TestUISurrogatesSeries: ''' Tests for pyleoclim.core.ui.SurrogateSeries ''' @pytest.mark.parametrize('method',['ar1sim','uar1']) - def test_surrogates_ar1(self, method, p=5, eps=0.2, seed = 108): + @pytest.mark.parametrize('params',[[5,1],[2,2]]) + def test_surrogates_ar1(self, method, params, nsim=10, eps=0.3, seed = 108): ''' Generate AR(1) surrogates based on a AR(1) series with certain parameters, estimate the parameters from the surrogates series and verify accuracy ''' - g = 0.5 # persistence - ar = [1, -g] - ma = [1, 0] - n = 1000 - ar1 = arma_generate_sample(ar, ma, nsample=n, scale=1) - ts = pyleo.Series(time=np.arange(1000), value=ar1) - - ts_surrs = ts.surrogates(number=p) - for ts_surr in ts_surrs.series_list: - g_surr = tsmodel.ar1_fit(ts_surr.value) - assert np.abs(g_surr-g) < eps + #t, v = pyleo.utils.gen_ts(model='ar1') + #ts = pyleo.Series(time =t, value = v, verbose=False, auto_time_params=True) + + ar1 = pyleo.SurrogateSeries(method='ar1sim', number=nsim) + ar1.from_params(params = params, seed=seed) + tau = np.empty((nsim)) + sigma_2 = np.empty_like(tau) + for i, ts in enumerate(ar1.series_list): + tau[i], sigma_2[i] = tsmodel.uar1_fit(ts.value, ts.time) + assert np.abs(tau.mean()-params[0]) < eps + assert np.abs(sigma_2.mean()-params[1]) < eps - @pytest.mark.parametrize('number',[1,5]) - def test_surrogates_uar1_match(self, number): - ts = gen_ts(nt=550, alpha=1.0) - # generate surrogates - surr = ts.surrogates(method = 'uar1', number = number, time_pattern ="match") - for i in range(number): - assert(np.allclose(surr.series_list[i].time, ts.time)) + # @pytest.mark.parametrize('number',[1,5]) + # def test_surrogates_uar1_match(self, number): + # t, v = pyleo.utils.gen_ts(model='ar1') + # ts = pyleo.Series(time =t, value = v, verbose=False, auto_time_params=True) + # ar1 = pyleo.SurrogateSeries(method='ar1sim', number=number) + # ar1.from_series(ts) + # # generate surrogates + # surr = ts.surrogates(method = 'uar1', number = number, time_pattern ="match") + # for i in range(number): + # assert(np.allclose(surr.series_list[i].time, ts.time)) - def test_surrogates_uar1_even(self, p=5): - ts = gen_ts(nt=550, alpha=1.0) - time_incr = np.median(np.diff(ts.time)) - # generate surrogates - surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="even", settings={"time_increment" :time_incr}) - for i in range(p): - assert(np.allclose(tsmodel.inverse_cumsum(surr.series_list[i].time),time_incr)) + # def test_surrogates_uar1_even(self, p=5): + # ts = gen_ts(nt=550, alpha=1.0) + # time_incr = np.median(np.diff(ts.time)) + # # generate surrogates + # surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="even", settings={"time_increment" :time_incr}) + # for i in range(p): + # assert(np.allclose(tsmodel.inverse_cumsum(surr.series_list[i].time),time_incr)) - def test_surrogates_uar1_random(self, p=5, tol = 0.5): - tau = 2 - sigma_2 = 1 - n = 500 - # generate time index - t = np.arange(1,(n+1)) - # create time series - ys = tsmodel.uar1_sim(t, tau_0=tau, sigma_2_0=sigma_2) - ts = pyleo.Series(time = t, value=ys, auto_time_params=True,verbose=False) - # generate surrogates default is exponential with parameter value 1 - surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="random") - #surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="uneven",settings={"delta_t_dist" :"poisson","param":[1]} ) + # def test_surrogates_uar1_random(self, p=5, tol = 0.5): + # tau = 2 + # sigma_2 = 1 + # n = 500 + # # generate time index + # t = np.arange(1,(n+1)) + # # create time series + # ys = tsmodel.uar1_sim(t, tau_0=tau, sigma_2_0=sigma_2) + # ts = pyleo.Series(time = t, value=ys, auto_time_params=True,verbose=False) + # # generate surrogates default is exponential with parameter value 1 + # surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="random") + # #surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="uneven",settings={"delta_t_dist" :"poisson","param":[1]} ) - for i in range(p): - delta_t = tsmodel.inverse_cumsum(surr.series_list[i].time) - # Compute the empirical cumulative distribution function (CDF) of the generated data - empirical_cdf, bins = np.histogram(delta_t, bins=100, density=True) - empirical_cdf = np.cumsum(empirical_cdf) * np.diff(bins) - # Compute the theoretical CDF of the Exponential distribution - theoretical_cdf = expon.cdf(bins[1:], scale=1) - # Trim theoretical_cdf to match the size of empirical_cdf - theoretical_cdf = theoretical_cdf[:len(empirical_cdf)] - # Compute the L2 norm (Euclidean distance) between empirical and theoretical CDFs - l2_norm = np.linalg.norm(empirical_cdf - theoretical_cdf) - assert(l2_norm Date: Tue, 16 Apr 2024 16:56:03 -0700 Subject: [PATCH 06/10] extensive tests (and debugging) for SurrogateSeries --- pyleoclim/core/surrogateseries.py | 61 ++++---- pyleoclim/tests/test_core_SurrogateSeries.py | 151 +++++++++++++------ pyleoclim/tests/test_utils_tsmodel.py | 11 +- pyleoclim/utils/tsmodel.py | 6 +- 4 files changed, 146 insertions(+), 83 deletions(-) diff --git a/pyleoclim/core/surrogateseries.py b/pyleoclim/core/surrogateseries.py index 53d0d363..89997d7e 100644 --- a/pyleoclim/core/surrogateseries.py +++ b/pyleoclim/core/surrogateseries.py @@ -14,7 +14,8 @@ supported_surrogates = frozenset(['ar1sim','phaseran', 'uar1']) # broadcast all supported surrogates as global variable, for exception handling class SurrogateSeries(EnsembleSeries): - ''' Object containing surrogate timeseries, obtained by emulating an existing series, or more a statistical model + ''' Object containing surrogate timeseries, obtained either by emulating an + existing series (see `from_series()`) or from a parametric model (see `from_params()`) Surrogate Series is a child of EnsembleSeries. All methods available for EnsembleSeries are available for surrogate series. @@ -49,15 +50,16 @@ def __init__(self, series_list=[], number=1, method=None, label=None): self.number = number # refine the display name - if method == 'ar1sim': - self.label = str(label or "series") + " surrogates [AR(1) MoM]" - elif method == 'phaseran': - self.label = str(label or "series") + " surrogates [phase-randomized]" - elif method == 'uar1': - self.label = str(label or "series") + " surrogates [AR(1) MLE]" - else: - raise ValueError(f"Unknown method: {self.method}. Please use one of {supported_surrogates}") - + if label is None: + if method == 'ar1sim': + self.label = "AR(1) surrogates (MoM)" + elif method == 'phaseran': + self.label = "phase-randomized surrogates" + elif method == 'uar1': + self.label = "AR(1) surrogates (MLE)" + else: + raise ValueError(f"Unknown method: {self.method}. Please use one of {supported_surrogates}") + def from_series(self, target_series, seed=None): ''' Fashion the SurrogateSeries object after a target series @@ -101,9 +103,6 @@ def from_series(self, target_series, seed=None): if seed is not None: np.random.seed(seed) - # generate time axes according to provided pattern - times = np.tile(target_series.time, (self.number, 1)).T - # apply surrogate method if self.method == 'ar1sim': y_surr = tsmodel.ar1_sim(target_series.value, self.number, target_series.time) @@ -117,18 +116,23 @@ def from_series(self, target_series, seed=None): elif self.method == 'uar1': # estimate theta with MLE tau, sigma_2 = tsmodel.uar1_fit(target_series.value, target_series.time) + # generate time axes according to provided pattern + times = np.squeeze(np.tile(target_series.time, (self.number, 1)).T) + # generate matrix y_surr = tsmodel.uar1_sim(t = times, tau=tau, sigma_2=sigma_2) - - s_list = [] - for i, (t, y) in enumerate(zip(times.T,y_surr.T)): - ts = Series(time=t, value=y, - time_name=target_series.time_name, - time_unit=target_series.time_unit, - value_name=target_series.value_name, - value_unit=target_series.value_unit, - label = str(target_series.label or '') + " surr #" + str(i+1), - verbose=False, auto_time_params=False) - s_list.append(ts) + + if self.number > 1: + s_list = [] + for i, y in enumerate(y_surr.T): + ts = target_series.copy() # copy Series + ts.value = y # replace value with y_surr column + ts.label = str(target_series.label or '') + " surr #" + str(i+1) + s_list.append(ts) + else: + ts = target_series.copy() # copy Series + ts.value = y_surr # replace value with y_surr column + ts.label = str(target_series.label or '') + " surr" + s_list = [ts] self.series_list = s_list @@ -148,7 +152,7 @@ def from_params(self, params, length=50, time_pattern = 'even', seed=None, setti The pattern used to generate the surrogate time axes * 'even' uses an evenly-spaced time with spacing `delta_t` specified in settings (if not specified, defaults to 1.0) * 'random' uses random_time_index() with specified distribution and parameters - Relevant settings are `delta_t_dist` and `param`. (default: 'exponential' with parameter = 1) + Relevant settings are `delta_t_dist` and `param`. (default: 'exponential' with parameter = 1.0) * 'specified': uses time axis `time` from `settings`. seed : int @@ -201,20 +205,19 @@ def from_params(self, params, length=50, time_pattern = 'even', seed=None, setti times = np.tile(settings["time"], (self.number, 1)).T else: raise ValueError(f"Unknown time pattern: {time_pattern}") - # apply surrogate method y_surr = np.empty_like(times) - if self.method == 'uar1' or 'ar1sim': + if self.method in ['uar1','ar1sim']: # if nparams<2: warnings.warn(f'The AR(1) model needs 2 parameters, tau and sigma2 (in that order); {nparams} provided. default values used',UserWarning, stacklevel=2) params = [5,0.5] y_surr = tsmodel.uar1_sim(t = times, tau=params[0], sigma_2=params[1]) - elif self.method == 'phaseran': + elif self.method == 'phaseran': raise ValueError("Phase-randomization is only available in from_series().") - + # create the series_list s_list = [] for i, (t, y) in enumerate(zip(times.T,y_surr.T)): diff --git a/pyleoclim/tests/test_core_SurrogateSeries.py b/pyleoclim/tests/test_core_SurrogateSeries.py index 8df7de39..63e6170b 100644 --- a/pyleoclim/tests/test_core_SurrogateSeries.py +++ b/pyleoclim/tests/test_core_SurrogateSeries.py @@ -16,18 +16,18 @@ import pytest import pyleoclim as pyleo import pyleoclim.utils.tsmodel as tsmodel +from numpy.testing import assert_array_equal +from scipy import stats class TestUISurrogatesSeries: ''' Tests for pyleoclim.core.ui.SurrogateSeries ''' @pytest.mark.parametrize('method',['ar1sim','uar1']) - @pytest.mark.parametrize('params',[[5,1],[2,2]]) + @pytest.mark.parametrize('params',[[5,1],[2,2]]) # stacking pytests is legal! def test_surrogates_ar1(self, method, params, nsim=10, eps=0.3, seed = 108): ''' Generate AR(1) surrogates based on a AR(1) series with certain parameters, estimate the parameters from the surrogates series and verify accuracy ''' - #t, v = pyleo.utils.gen_ts(model='ar1') - #ts = pyleo.Series(time =t, value = v, verbose=False, auto_time_params=True) ar1 = pyleo.SurrogateSeries(method='ar1sim', number=nsim) ar1.from_params(params = params, seed=seed) @@ -37,48 +37,111 @@ def test_surrogates_ar1(self, method, params, nsim=10, eps=0.3, seed = 108): tau[i], sigma_2[i] = tsmodel.uar1_fit(ts.value, ts.time) assert np.abs(tau.mean()-params[0]) < eps assert np.abs(sigma_2.mean()-params[1]) < eps + + @pytest.mark.parametrize('method',['ar1sim','uar1','phaseran']) + @pytest.mark.parametrize('number',[1,5]) + def test_surrogates_match(self, method, number): + ''' Test that from_series() work with all methods, matches the original + time axis AND that number can be varied + ''' + t, v = pyleo.utils.gen_ts(model='colored_noise', nt=100) + ts = pyleo.Series(time = t, value = v, verbose=False, auto_time_params=True) + ar1 = pyleo.SurrogateSeries(method=method, number=number) + ar1.from_series(ts) + assert len(ar1.series_list) == number # test right number + for s in ar1.series_list: + assert_array_equal(s.time, ts.time) # test time axis match + + @pytest.mark.parametrize('delta_t',[1,3]) + @pytest.mark.parametrize('length',[10,20]) + def test_surrogates_from_params_even(self, delta_t, length, number=2): + ''' Test from_params() with even time axes with varying resolution + ''' + surr = pyleo.SurrogateSeries(method='ar1sim', number=number) + surr.from_params(params = [5,1], time_pattern='even', length= length, + settings={"delta_t" :delta_t}) + for ts in surr.series_list: + assert(np.allclose(tsmodel.inverse_cumsum(ts.time),delta_t)) + assert len(ts.time) == length - # @pytest.mark.parametrize('number',[1,5]) - # def test_surrogates_uar1_match(self, number): - # t, v = pyleo.utils.gen_ts(model='ar1') - # ts = pyleo.Series(time =t, value = v, verbose=False, auto_time_params=True) - # ar1 = pyleo.SurrogateSeries(method='ar1sim', number=number) - # ar1.from_series(ts) - # # generate surrogates - # surr = ts.surrogates(method = 'uar1', number = number, time_pattern ="match") - # for i in range(number): - # assert(np.allclose(surr.series_list[i].time, ts.time)) + @pytest.mark.parametrize('dist', ["exponential", "poisson"]) + def test_surrogates_random_time_t0(self, dist, number=2, tol = 3, param=[1]): + ''' Test from_params() with random time axes with two 1-parameter distributions + ''' + surr = pyleo.SurrogateSeries(method='ar1sim', number=number) + surr.from_params(params = [5,1], length=200, time_pattern='random', seed=108, + settings={"delta_t_dist" :dist ,"param":param}) + + if dist == "exponential": + scipy_dist = stats.expon + else: + scipy_dist = stats.poisson + + for i in range(number): + delta_t = tsmodel.inverse_cumsum(surr.series_list[i].time) + # Compute the empirical cumulative distribution function (CDF) of the generated data + empirical_cdf, bins = np.histogram(delta_t, bins=100, density=True) + empirical_cdf = np.cumsum(empirical_cdf) * np.diff(bins) + # Compute the theoretical CDF of the Exponential distribution + theoretical_cdf = scipy_dist.cdf(bins[1:],param[0]) + # Trim theoretical_cdf to match the size of empirical_cdf + theoretical_cdf = theoretical_cdf[:len(empirical_cdf)] + # Compute the L2 norm (Euclidean distance) between empirical and theoretical CDFs + l2_norm = np.linalg.norm(empirical_cdf - theoretical_cdf) + assert(l2_norm0) - - - @pytest.mark.parametrize(('p', 'evenly_spaced'), [(1, True), (10, True), (1, False), (10, False)]) -def test_uar1_fit(p, evenly_spaced): +def test_uar1_fit(p, evenly_spaced, tol = 0.45): ''' Tests whether this method works well on an AR(1) process with known parameters and evenly spaced time points ''' # define tolerance - tol = .4 tau = 2 sigma_2 = 1 n = 500 + np.random.seed(108) if evenly_spaced: t_arr = np.tile(range(1,n), (p, 1)).T @@ -62,7 +59,7 @@ def test_uar1_fit(p, evenly_spaced): theta_hat_matrix = np.empty((p, 2)) # estimate parameters for each time series for j in range(p): - ys = tsmodel.uar1_sim(t = t_arr[:, j], tau_0 = tau, sigma_2_0=sigma_2) + ys = tsmodel.uar1_sim(t = t_arr[:, j], tau = tau, sigma_2=sigma_2) theta_hat_matrix[j,:] = tsmodel.uar1_fit(ys, t_arr[:, j]) # compute mean of estimated param for each simulate ts theta_hat_bar = np.mean(theta_hat_matrix, axis=0) diff --git a/pyleoclim/utils/tsmodel.py b/pyleoclim/utils/tsmodel.py index 07d98caf..27fe171a 100644 --- a/pyleoclim/utils/tsmodel.py +++ b/pyleoclim/utils/tsmodel.py @@ -721,10 +721,10 @@ def uar1_sim(t, tau, sigma_2=1): # fill the array for j in range(p): # Note: this shouldn't work but it does! for i in range(1, n): - delta_i = t[i] - t[i-1] + delta_i = t[i,j] - t[i-1,j] phi = np.exp(-delta_i / tau) sigma_i = np.sqrt(sigma_2 * (1-phi**2)) - y[i] = phi * y[i-1] + sigma_i * z[i] + y[i,j] = phi * y[i-1,j] + sigma_i * z[i,j] y = np.squeeze(y) # squeeze superfluous dimensions return y @@ -732,7 +732,7 @@ def uar1_sim(t, tau, sigma_2=1): def inverse_cumsum(arr): return np.diff(np.concatenate(([0], arr))) -def random_time_index(n, delta_t_dist = "exponential", param = [1]): +def random_time_index(n, delta_t_dist = "exponential", param = [1.0]): ''' Generate a random time index vector according to a specific probability model From 527ba2d70b7b4c58b2fb94db8972c8c3d102b43f Mon Sep 17 00:00:00 2001 From: CommonClimate Date: Tue, 16 Apr 2024 19:14:04 -0700 Subject: [PATCH 07/10] updated correlation tests and docstrings --- pyleoclim/core/series.py | 84 ++++++++++++++--------------- pyleoclim/tests/test_core_Series.py | 65 ++++++++++------------ 2 files changed, 71 insertions(+), 78 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index 7dd38b67..6f8bb65b 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3349,9 +3349,10 @@ def wavelet_coherence(self, target_series, method='cwt', settings=None, return coh - def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = 'phaseran', timespan=None, - settings=None, common_time_kwargs=None, seed=None, mute_pbar=False): - ''' Estimates the correlation and its associated significance between two time series (not ncessarily IID). + def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = 'phaseran', + number=1000, timespan=None, settings=None, seed=None, + common_time_kwargs=None, mute_pbar=False): + ''' Estimates the correlation and its associated significance between two time series (not ncessarily IID) as per [1] The significance of the correlation is assessed using one of the following methods: @@ -3384,8 +3385,10 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = method : str, {'ttest','built-in','ar1sim','phaseran'} method for significance testing. Default is 'phaseran' - 'ttest' implements the T-test with degrees of freedom adjusted for autocorrelation, as done in [1] - 'built-in' uses the p-value that ships with the SciPy function. + * 'ttest' implements the T-test with degrees of freedom adjusted for autocorrelation, as done in [1] + * 'built-in' uses the p-value that ships with the SciPy function. + * 'ar1sim' (formerly 'isopersistent') tests against an ensemble of AR(1) seires fitted to the originals + * 'phaseran' (formerly 'isospectral') tests against phase-randomized surrogates (aka the method of Ebisuzaki [2]) The old options 'isopersistent' and 'isospectral' still work, but trigger a deprecation warning. Note that 'weightedtau' does not have a known distribution, so the 'built-in' method returns an error in that case. @@ -3394,12 +3397,9 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = settings : dict Parameters for the correlation function, including: - - nsim : int - the number of simulations (default: 1000) - surr_settings : dict - Parameters for surrogate generator. See individual methods for details. - + * nsim (number of simulations) + NB: this is superseded by `number` + common_time_kwargs : dict Parameters for the method `MultipleSeries.common_time()`. Will use interpolation by default. @@ -3439,11 +3439,15 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = References ---------- [1] Hu, J., J. Emile-Geay, and J. Partin (2017), Correlation-based interpretations of paleoclimate data – where statistics meet past climates, Earth and Planetary Science Letters, 459, 362–371, doi:10.1016/j.epsl.2016.11.048. - + + [2] Ebisuzaki, W. (1997), A method to estimate the statistical significance of a correla- tion when the data are serially correlated, Journal of Climate, 10(9), 2147–2153, doi:10.1175/1520- 0442(1997)010¡2147:AMTETS¿2.0.CO;2. + Examples -------- - Correlation between the Nino3.4 index and the Deasonalized All Indian Rainfall Index + Let us compute the correlation between the Nino3.4 index and the Deasonalized All Indian Rainfall Index. + For expendiency, we limit the Monte Carlo tests to 20 surrogates, which is not sufficient to obtain accurate p-values. + The default number, 1000, is more respectable, though to avoid p-hacking, we recommend bumping it even higher if at all possible. .. jupyter-execute:: @@ -3451,26 +3455,25 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = ts_air = pyleo.utils.load_dataset('AIR') ts_nino = pyleo.utils.load_dataset('NINO3') - # with `nsim=20` and default `method='phaseran'` - # set an arbitrary random seed to fix the result - corr_res = ts_nino.correlation(ts_air, settings={'nsim': 20}, seed=2333) - print(corr_res) - - # changing the statistic - corr_res = ts_nino.correlation(ts_air, statistic='kendalltau') + # with 20 surrogates and the default significance method, 'phaseran'` + corr_res = ts_nino.correlation(ts_air, number=20) print(corr_res) - # using a simple t-test with DOFs adjusted for autocorrelation - # set an arbitrary random seed to fix the result + # using a simple t-test with DOFs adjusted for autocorrelation (only with Person's r') corr_res = ts_nino.correlation(ts_air, method='ttest') print(corr_res) # using "isopersistent" surrogates (AR(1) simulation) - # set an arbitrary random seed to fix the result - corr_res = ts_nino.correlation(ts_air, method = 'ar1sim', settings={'nsim': 20}, seed=2333) + # and setting an arbitrary random seed to fix the result + corr_res = ts_nino.correlation(ts_air, method = 'ar1sim', number=20, seed=2333) + print(corr_res) + + # changing the statistic works with all surrogate-based significance methods (but not the 'ttest' method, which only applies to a 'pearsonr' statistic) + corr_res = ts_nino.correlation(ts_air, statistic='kendalltau', method = 'phaseran', number=20) print(corr_res) ''' + from ..core.surrogateseries import SurrogateSeries, supported_surrogates if method == 'isospectral': warnings.warn("isospectral is deprecated and was replaced by 'phaseran'", DeprecationWarning, stacklevel=2) @@ -3481,8 +3484,6 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = method = 'ar1sim' settings = {} if settings is None else settings.copy() - #corr_args = {'alpha': alpha, 'method': method} - #corr_args.update(settings) ms = MultipleSeries([self, target_series]) if list(self.time) != list(target_series.time): @@ -3502,8 +3503,12 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = np.random.seed(seed) if method == 'ttest': - stat, signf, pval = corrutils.corr_ttest(ts0.value, ts1.value, alpha=alpha) - signif = bool(signf) + if statistic == 'pearsonr': + stat, signf, pval = corrutils.corr_ttest(ts0.value, ts1.value, alpha=alpha) + signif = bool(signf) + else: + raise ValueError(f"The adjusted T-test only applies to Pearson's r; got {statistic}") + elif method == 'built-in': if statistic == 'weightedtau': raise ValueError('The null distribution of this statistic is unknown; please use a non-parametric method to obtain the p-value') @@ -3512,27 +3517,22 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = stat = res[0] pval = res.pvalue if len(res) > 1 else np.nan signif = pval <= alpha - elif method in supported_surrogates: + elif method in supported_surrogates: if 'nsim' in settings.keys(): number = settings['nsim'] - settings.pop('nsim') + #settings.pop('nsim') else: - number = 1000 + number = number - settings['seed'] = seed - settings['method'] = method - settings['number'] = number - #number = corr_args['nsim'] if 'nsim' in corr_args.keys() else 1000 - #seed = corr_args['seed'] if 'seed' in corr_args.keys() else None - #method = corr_args['method'] if 'method' in corr_args.keys() else None - #surr_settings = corr_args['surr_settings'] if 'surr_settings' in corr_args.keys() else None - # compute correlation statistic stat = corrutils.association(ts0.value,ts1.value,statistic)[0] - - ts0_surr = ts0.surrogates(**settings) - ts1_surr = ts1.surrogates(**settings) + + # establish significance against surrogates stat_surr = np.empty((number)) + ts0_surr = SurrogateSeries(method=method,number=number) + ts0_surr.from_series(ts0) + ts1_surr = SurrogateSeries(method=method,number=number) + ts1_surr.from_series(ts1) for i in tqdm(range(number), desc='Evaluating association on surrogate pairs', total=number, disable=mute_pbar): stat_surr[i] = corrutils.association(ts0_surr.series_list[i].value, ts1_surr.series_list[i].value,statistic)[0] diff --git a/pyleoclim/tests/test_core_Series.py b/pyleoclim/tests/test_core_Series.py index 7e1d940d..32a32ff5 100644 --- a/pyleoclim/tests/test_core_Series.py +++ b/pyleoclim/tests/test_core_Series.py @@ -25,14 +25,10 @@ import pathlib test_dirpath = pathlib.Path(__file__).parent.absolute() -#from urllib.request import urlopen import pyleoclim as pyleo -#import pyleoclim.utils.tsmodel as tsmodel import pyleoclim.utils.tsbase as tsbase -#from statsmodels.tsa.arima_process import arma_generate_sample -#from scipy.stats import expon # a collection of useful functions @@ -586,9 +582,10 @@ def test_summary_plot_t1(self, pinkseries): class TestUISeriesCorrelation: ''' Test Series.correlation() ''' - @pytest.mark.parametrize('corr_method', ['ttest','built-in','ar1sim','phaseran']) - def test_correlation_t0a(self, corr_method, eps=0.1): - ''' Test various correlation methods + @pytest.mark.parametrize('sig_method', ['ttest','built-in','ar1sim','phaseran']) + @pytest.mark.parametrize('number', [2,5]) + def test_correlation_t0a(self, sig_method, number, eps=0.1): + ''' Test the various significance methods ''' nt = 100 rho = 0.4 # target correlation @@ -597,11 +594,11 @@ def test_correlation_t0a(self, corr_method, eps=0.1): v = rho*ts1.value + np.sqrt(1-rho**2)*np.random.normal(loc=0, scale=1, size=nt) ts2 = pyleo.Series(time=ts1.time, value=v, verbose=False, auto_time_params=True) - corr_res = ts1.correlation(ts2, method= corr_method) + corr_res = ts1.correlation(ts2, method= sig_method, number=number) assert np.abs(rho-corr_res.r) < eps - @pytest.mark.parametrize('corr_method', ['isopersistent', 'isospectral']) - def test_correlation_t0b(self, corr_method,eps=0.1): + @pytest.mark.parametrize('sig_method', ['isopersistent', 'isospectral']) + def test_correlation_t0b(self, sig_method,eps=0.1): ''' Test that deprecated method names get a proper warning ''' nt = 100 @@ -610,28 +607,11 @@ def test_correlation_t0b(self, corr_method,eps=0.1): v = rho*ts1.value + np.sqrt(1-rho**2)*np.random.normal(loc=0, scale=1, size=nt) ts2 = pyleo.Series(time=ts1.time, value=v, verbose=False, auto_time_params=True) with pytest.deprecated_call(): - corr_res = ts1.correlation(ts2, method= corr_method) + corr_res = ts1.correlation(ts2, method= sig_method, number=2) assert np.abs(rho-corr_res.r) < eps - # @pytest.mark.parametrize('corr_method', ['ttest', 'isopersistent', 'isospectral']) - # def test_correlation_t1(self, corr_method, eps=1): - # ''' Generate two colored noise series calculate their correlation - # ''' - # alpha = 1 - # nt = 1000 - # ts = gen_ts(nt=nt,alpha=alpha) - # v1 = ts.value + np.random.normal(loc=0, scale=1, size=nt) - # v2 = ts.value + np.random.normal(loc=0, scale=2, size=nt) - - # ts1 = pyleo.Series(time=ts.time, value=v1) - # ts2 = pyleo.Series(time=ts.time, value=v2) - - # corr_res = ts1.correlation(ts2, settings={'method': corr_method}) - # r = corr_res.r - # assert np.abs(r-0) < eps - - @pytest.mark.parametrize('corr_method', ['ttest','built-in','ar1sim','phaseran']) - def test_correlation_t2(self, corr_method, eps=0.5): + @pytest.mark.parametrize('sig_method', ['ttest','built-in','ar1sim','phaseran']) + def test_correlation_t1(self, sig_method, eps=0.5): ''' Test correlation between two series with inconsistent time axis ''' @@ -651,18 +631,18 @@ def test_correlation_t2(self, corr_method, eps=0.5): ts1_evenly = pyleo.Series(time=t, value=air) ts2_evenly = pyleo.Series(time=t, value=nino) - corr_res_evenly = ts1_evenly.correlation(ts2_evenly, method=corr_method) + corr_res_evenly = ts1_evenly.correlation(ts2_evenly, method=sig_method,number=2) r_evenly = corr_res_evenly.r - ts1 = pyleo.Series(time=air_time_unevenly, value=air_value_unevenly) - ts2 = pyleo.Series(time=nino_time_unevenly, value=nino_value_unevenly) + ts1 = pyleo.Series(time=air_time_unevenly, value=air_value_unevenly, verbose=False, auto_time_params=True) + ts2 = pyleo.Series(time=nino_time_unevenly, value=nino_value_unevenly, verbose=False, auto_time_params=True) - corr_res = ts1.correlation(ts2, method=corr_method, common_time_kwargs={'method': 'interp'}) + corr_res = ts1.correlation(ts2, method=sig_method, number=2, common_time_kwargs={'method': 'interp'}) r = corr_res.r assert np.abs(r-r_evenly) < eps @pytest.mark.parametrize('stat', ['linregress','pearsonr','spearmanr','pointbiserialr','kendalltau','weightedtau']) - def test_correlation_t3(self, stat, eps=0.2): + def test_correlation_t2(self, stat, eps=0.2): ''' Test that various statistics can be used https://docs.scipy.org/doc/scipy/reference/stats.html#association-correlation-tests ''' @@ -674,11 +654,24 @@ def test_correlation_t3(self, stat, eps=0.2): ts2 = pyleo.Series(time=ts1.time, value=v, verbose=False, auto_time_params=True) if stat == 'weightedtau': - corr_res = ts1.correlation(ts2, statistic=stat,settings={'nsim':20}) + corr_res = ts1.correlation(ts2, statistic=stat,number = 2) else: corr_res = ts1.correlation(ts2, statistic=stat, method='built-in') assert np.abs(rho-corr_res.r) < eps + def test_correlation_t3(self, eps=0.1, nt=10): + ''' Test that t-test screams at the user if a non-pearson statistic is requested + ''' + rho = 0.4 # target correlation + ts1 = gen_ts(nt=nt, alpha=1,seed=333).standardize() + # generate series whose correlation with ts1 should be close to rho: + v = rho*ts1.value + np.sqrt(1-rho**2)*np.random.normal(loc=0, scale=1, size=nt) + ts2 = pyleo.Series(time=ts1.time, value=v, verbose=False, auto_time_params=True) + with pytest.raises(ValueError): + corr_res = ts1.correlation(ts2, statistic='kendalltau', method= 'ttest') + + + class TestUISeriesCausality: ''' Test Series.causality() From f221b1370c8984ff3e867ed454cbda27488e55bb Mon Sep 17 00:00:00 2001 From: CommonClimate Date: Wed, 17 Apr 2024 22:06:34 -0700 Subject: [PATCH 08/10] correlation settings and SurrogateSeries param assignment --- pyleoclim/core/series.py | 23 ++++++---- pyleoclim/core/surrogateseries.py | 44 +++++++++++++------- pyleoclim/tests/test_core_SurrogateSeries.py | 32 +++++++------- pyleoclim/utils/correlation.py | 1 + 4 files changed, 60 insertions(+), 40 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index 6f8bb65b..4bde3de3 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3396,9 +3396,7 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = The time interval over which to perform the calculation settings : dict - Parameters for the correlation function, including: - * nsim (number of simulations) - NB: this is superseded by `number` + optional parameters for the measures of association. See https://docs.scipy.org/doc/scipy/reference/stats.html#association-correlation-tests common_time_kwargs : dict Parameters for the method `MultipleSeries.common_time()`. Will use interpolation by default. @@ -3469,8 +3467,14 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = print(corr_res) # changing the statistic works with all surrogate-based significance methods (but not the 'ttest' method, which only applies to a 'pearsonr' statistic) - corr_res = ts_nino.correlation(ts_air, statistic='kendalltau', method = 'phaseran', number=20) + corr_res = ts_nino.correlation(ts_air, statistic='spearmanr', method = 'phaseran', number=20) print(corr_res) + + # To use the built-in signficance test: + corr_res2s = ts_nino.correlation(ts_air, statistic='spearmanr', method = 'built-in') + # To modify the method, use `settings`. For instance, to specify a 1-sided test instead of a 2-sided one for Spearman's R: + corr_res1s = ts_nino.correlation(ts_air, statistic='spearmanr', method = 'built-in', settings={'alternative':'less'}) + print(corr_res1s.p-corr_res2s.p) # shows the difference in p-values ''' from ..core.surrogateseries import SurrogateSeries, supported_surrogates @@ -3513,19 +3517,20 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = if statistic == 'weightedtau': raise ValueError('The null distribution of this statistic is unknown; please use a non-parametric method to obtain the p-value') else: - res = corrutils.association(ts0.value,ts1.value,statistic) + res = corrutils.association(ts0.value,ts1.value,statistic, settings) stat = res[0] pval = res.pvalue if len(res) > 1 else np.nan signif = pval <= alpha elif method in supported_surrogates: - if 'nsim' in settings.keys(): + if 'nsim' in settings.keys(): # for legacy reasons + raise DeprecationWarning("The number of simulations is now governed by the parameter `number`. nsim will be removed in an upcoming release") number = settings['nsim'] - #settings.pop('nsim') + settings.pop('nsim') else: number = number # compute correlation statistic - stat = corrutils.association(ts0.value,ts1.value,statistic)[0] + stat = corrutils.association(ts0.value,ts1.value,statistic,settings)[0] # establish significance against surrogates stat_surr = np.empty((number)) @@ -3535,7 +3540,7 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = ts1_surr.from_series(ts1) for i in tqdm(range(number), desc='Evaluating association on surrogate pairs', total=number, disable=mute_pbar): stat_surr[i] = corrutils.association(ts0_surr.series_list[i].value, - ts1_surr.series_list[i].value,statistic)[0] + ts1_surr.series_list[i].value,statistic, settings)[0] # obtain p-value pval = np.sum(np.abs(stat_surr) >= np.abs(stat)) / number # establish significance diff --git a/pyleoclim/core/surrogateseries.py b/pyleoclim/core/surrogateseries.py index 89997d7e..5d0c7027 100644 --- a/pyleoclim/core/surrogateseries.py +++ b/pyleoclim/core/surrogateseries.py @@ -15,12 +15,12 @@ class SurrogateSeries(EnsembleSeries): ''' Object containing surrogate timeseries, obtained either by emulating an - existing series (see `from_series()`) or from a parametric model (see `from_params()`) + existing series (see `from_series()`) or from a parametric model (see `from_param()`) Surrogate Series is a child of EnsembleSeries. All methods available for EnsembleSeries are available for surrogate series. ''' - def __init__(self, series_list=[], number=1, method=None, label=None): + def __init__(self, series_list=[], number=1, method=None, label=None, param=[]): ''' Initiatlize a SurrogateSeries object. The parameters below are then used for series generation and labeling. @@ -30,10 +30,13 @@ def __init__(self, series_list=[], number=1, method=None, label=None): a list of pyleoclim.Series objects. The default is []. number : int - The number of surrogates to generate. THe default is 1. + The number of surrogates to generate. The default is 1. method : str {ar1sim, phaseran, uar1} - The name of the method used to generate surrogates of the timeseries + The name of the method used to generate surrogates of the timeseries (no default) + + param : list + a list of parameters for the method in question. Ignored if the method does not require parameters. label : str label of the collection of timeseries (e.g. 'SOI surrogates [AR(1) MLE]') @@ -47,8 +50,18 @@ def __init__(self, series_list=[], number=1, method=None, label=None): ''' self.series_list = series_list self.method = method + self.param = param self.number = number + if param is not None: + nparam = len(param) + if 'ar1' in method and nparam !=2: + raise ValueError("2 parameters are needed for this model") + elif method == 'phaseran': + warnings.warn("Phase-randomization is a non parametric method; the provided parameters will be ignored") + elif method in ['fBm','CN'] and nparam >1: + raise ValueError(f"1 parameter is needed for this model; only the first of the provided {nparam} will be used") + # refine the display name if label is None: if method == 'ar1sim': @@ -116,6 +129,7 @@ def from_series(self, target_series, seed=None): elif self.method == 'uar1': # estimate theta with MLE tau, sigma_2 = tsmodel.uar1_fit(target_series.value, target_series.time) + self.param = [tau, sigma_2] # assign parameters for future use # generate time axes according to provided pattern times = np.squeeze(np.tile(target_series.time, (self.number, 1)).T) # generate matrix @@ -136,13 +150,13 @@ def from_series(self, target_series, seed=None): self.series_list = s_list - def from_params(self, params, length=50, time_pattern = 'even', seed=None, settings=None): + def from_param(self, param, length=50, time_pattern = 'even', seed=None, settings=None): ''' - Simulate the SurrogateSeries from a given probability model + Simulate the SurrogateSeries from a parametric model Parameters ---------- - params : list + param : list model parameters (e.g. [tau, sigma0] for an AR(1) model) length : int @@ -175,12 +189,12 @@ def from_params(self, params, length=50, time_pattern = 'even', seed=None, setti Examples -------- ar1 = pyleo.SurrogateSeries(method='ar1sim', number=10) - ar1.from_params(length=100, params = [2,2]) + ar1.from_param(length=100, param = [2,2]) ar1.plot_envelope(title=rf'AR(1) synthetic series ($\tau={2},\sigma^2={2}$)') ''' - params = list(params) if params is not None else [] # coerce params into a list, no matter the original format - nparams = len(params) + param = list(param) if param is not None else [] # coerce param into a list, no matter the original format + nparam = len(param) settings = {} if settings is None else settings.copy() @@ -210,10 +224,10 @@ def from_params(self, params, length=50, time_pattern = 'even', seed=None, setti y_surr = np.empty_like(times) if self.method in ['uar1','ar1sim']: # - if nparams<2: - warnings.warn(f'The AR(1) model needs 2 parameters, tau and sigma2 (in that order); {nparams} provided. default values used',UserWarning, stacklevel=2) - params = [5,0.5] - y_surr = tsmodel.uar1_sim(t = times, tau=params[0], sigma_2=params[1]) + if nparam<2: + warnings.warn(f'The AR(1) model needs 2 parameters, tau and sigma2 (in that order); {nparam} provided. default values used',UserWarning, stacklevel=2) + param = [5,0.5] + y_surr = tsmodel.uar1_sim(t = times, tau=param[0], sigma_2=param[1]) elif self.method == 'phaseran': raise ValueError("Phase-randomization is only available in from_series().") @@ -223,7 +237,7 @@ def from_params(self, params, length=50, time_pattern = 'even', seed=None, setti for i, (t, y) in enumerate(zip(times.T,y_surr.T)): ts = Series(time=t, value=y, label = str(self.label or '') + " surr #" + str(i+1), - verbose=False, auto_time_params=True) + verbose=False, auto_time_param=True) s_list.append(ts) self.series_list = s_list diff --git a/pyleoclim/tests/test_core_SurrogateSeries.py b/pyleoclim/tests/test_core_SurrogateSeries.py index 63e6170b..6ce6f574 100644 --- a/pyleoclim/tests/test_core_SurrogateSeries.py +++ b/pyleoclim/tests/test_core_SurrogateSeries.py @@ -23,20 +23,20 @@ class TestUISurrogatesSeries: ''' Tests for pyleoclim.core.ui.SurrogateSeries ''' @pytest.mark.parametrize('method',['ar1sim','uar1']) - @pytest.mark.parametrize('params',[[5,1],[2,2]]) # stacking pytests is legal! - def test_surrogates_ar1(self, method, params, nsim=10, eps=0.3, seed = 108): + @pytest.mark.parametrize('param',[[5,1],[2,2]]) # stacking pytests is legal! + def test_surrogates_ar1(self, method, param, nsim=10, eps=0.3, seed = 108): ''' Generate AR(1) surrogates based on a AR(1) series with certain parameters, estimate the parameters from the surrogates series and verify accuracy ''' ar1 = pyleo.SurrogateSeries(method='ar1sim', number=nsim) - ar1.from_params(params = params, seed=seed) + ar1.from_param(param = param, seed=seed) tau = np.empty((nsim)) sigma_2 = np.empty_like(tau) for i, ts in enumerate(ar1.series_list): tau[i], sigma_2[i] = tsmodel.uar1_fit(ts.value, ts.time) - assert np.abs(tau.mean()-params[0]) < eps - assert np.abs(sigma_2.mean()-params[1]) < eps + assert np.abs(tau.mean()-param[0]) < eps + assert np.abs(sigma_2.mean()-param[1]) < eps @pytest.mark.parametrize('method',['ar1sim','uar1','phaseran']) @pytest.mark.parametrize('number',[1,5]) @@ -45,7 +45,7 @@ def test_surrogates_match(self, method, number): time axis AND that number can be varied ''' t, v = pyleo.utils.gen_ts(model='colored_noise', nt=100) - ts = pyleo.Series(time = t, value = v, verbose=False, auto_time_params=True) + ts = pyleo.Series(time = t, value = v, verbose=False, auto_time_param=True) ar1 = pyleo.SurrogateSeries(method=method, number=number) ar1.from_series(ts) assert len(ar1.series_list) == number # test right number @@ -54,11 +54,11 @@ def test_surrogates_match(self, method, number): @pytest.mark.parametrize('delta_t',[1,3]) @pytest.mark.parametrize('length',[10,20]) - def test_surrogates_from_params_even(self, delta_t, length, number=2): - ''' Test from_params() with even time axes with varying resolution + def test_surrogates_from_param_even(self, delta_t, length, number=2): + ''' Test from_param() with even time axes with varying resolution ''' surr = pyleo.SurrogateSeries(method='ar1sim', number=number) - surr.from_params(params = [5,1], time_pattern='even', length= length, + surr.from_param(param = [5,1], time_pattern='even', length= length, settings={"delta_t" :delta_t}) for ts in surr.series_list: assert(np.allclose(tsmodel.inverse_cumsum(ts.time),delta_t)) @@ -66,10 +66,10 @@ def test_surrogates_from_params_even(self, delta_t, length, number=2): @pytest.mark.parametrize('dist', ["exponential", "poisson"]) def test_surrogates_random_time_t0(self, dist, number=2, tol = 3, param=[1]): - ''' Test from_params() with random time axes with two 1-parameter distributions + ''' Test from_param() with random time axes with two 1-parameter distributions ''' surr = pyleo.SurrogateSeries(method='ar1sim', number=number) - surr.from_params(params = [5,1], length=200, time_pattern='random', seed=108, + surr.from_param(param = [5,1], length=200, time_pattern='random', seed=108, settings={"delta_t_dist" :dist ,"param":param}) if dist == "exponential": @@ -94,7 +94,7 @@ def test_surrogates_random_time_t1(self, number=2, tol = 3, param=[4.2,2.5]): ''' Test random time axes with Pareto distribution ''' surr = pyleo.SurrogateSeries(method='ar1sim', number=number) - surr.from_params(params = [5,1], time_pattern='random', seed=108, + surr.from_param(param = [5,1], time_pattern='random', seed=108, settings={"delta_t_dist" :"pareto" ,"param":param}) for i in range(number): @@ -114,7 +114,7 @@ def test_surrogates_random_time_t2(self, number=2, tol = 3, param=[[1,2],[.80,.2 ''' Test AR(1) model w/ random time axes (uniform choice) ''' surr = pyleo.SurrogateSeries(method='ar1sim', number=number) - surr.from_params(params = [5,1], time_pattern='random', seed=108, + surr.from_param(param = [5,1], time_pattern='random', seed=108, settings={"delta_t_dist" :"random_choice" ,"param":param}) for ts in surr.series_list: @@ -127,18 +127,18 @@ def test_surrogates_specified_time(self, length, number=2): ''' ti = tsmodel.random_time_index(n=length) surr = pyleo.SurrogateSeries(method='ar1sim', number=number) - surr.from_params(params = [5,1], time_pattern='specified', seed=108, + surr.from_param(param = [5,1], time_pattern='specified', seed=108, settings={"time":ti}) for ts in surr.series_list: assert_array_equal(ts.time, ti) # test time axis match def test_surrogates_forbidden(self, number=1): - ''' Test that phase randomization is rejected in from_params() + ''' Test that phase randomization is rejected in from_param() ''' surr = pyleo.SurrogateSeries(method='phaseran', number=number) with pytest.raises(ValueError): - surr.from_params(params = [5,1]) # check that this returns an error + surr.from_param(param = [5,1]) # check that this returns an error diff --git a/pyleoclim/utils/correlation.py b/pyleoclim/utils/correlation.py index cdf045f4..9fba5874 100644 --- a/pyleoclim/utils/correlation.py +++ b/pyleoclim/utils/correlation.py @@ -709,6 +709,7 @@ def association(y1, y2, statistic='pearsonr',settings=None): acceptable_methods = ['linregress','pearsonr','spearmanr','pointbiserialr','kendalltau','weightedtau'] if statistic in acceptable_methods: func = getattr(stats, statistic) + print(args) res = func(y1,y2,**args) else: raise ValueError(f'Wrong statistic: {statistic}; acceptable choices are {acceptable_methods}') From 2dcf6689d66f0706a145edd50309cf7e37949817 Mon Sep 17 00:00:00 2001 From: CommonClimate Date: Fri, 19 Apr 2024 16:23:19 -0700 Subject: [PATCH 09/10] enable PSD, Scalograms, Coherence and fix minor bugs --- pyleoclim/core/coherence.py | 19 ++++------- pyleoclim/core/psds.py | 34 +++----------------- pyleoclim/core/scalograms.py | 10 +++--- pyleoclim/core/series.py | 4 +-- pyleoclim/core/surrogateseries.py | 19 +++++------ pyleoclim/tests/test_core_PSD.py | 16 +++------ pyleoclim/tests/test_core_SurrogateSeries.py | 26 +++++++-------- 7 files changed, 46 insertions(+), 82 deletions(-) diff --git a/pyleoclim/core/coherence.py b/pyleoclim/core/coherence.py index 71bd079d..fa41041b 100644 --- a/pyleoclim/core/coherence.py +++ b/pyleoclim/core/coherence.py @@ -692,17 +692,17 @@ def signif_test(self, number=200, method='ar1sim', seed=None, qs=[0.95], setting coh.signif_test(method='phaseran').plot() ''' - + from ..core.surrogateseries import SurrogateSeries + if number == 0: return self new = self.copy() - surr1 = self.timeseries1.surrogates( - number=number, seed=seed, method=method, settings=settings - ) - surr2 = self.timeseries2.surrogates( - number=number, seed=seed, method=method, settings=settings - ) + + surr1 = SurrogateSeries(method=method,number=number, seed=seed) + surr1.from_series(self.timeseries1) + surr2 = SurrogateSeries(method=method,number=number, seed=seed) + surr2.from_series(self.timeseries2) # adjust time axis @@ -730,11 +730,6 @@ def signif_test(self, number=200, method='ar1sim', seed=None, qs=[0.95], setting wtc_qs = np.ndarray(shape=(nq, nf, nt)) xwt_qs = np.empty_like(wtc_qs) - # for i in range(nf): - # for j in range(nt): - # wtc_qs[:,i,j] = mquantiles(wtcs[:,i,j], qs) - # xwt_qs[:,i,j] = mquantiles(xwts[:,i,j], qs) - # extract quantiles and reshape wtc_qs = mquantiles(wtcs_r, qs, axis=0) wtc_qs = np.reshape(wtc_qs, (nq, nf, nt)) diff --git a/pyleoclim/core/psds.py b/pyleoclim/core/psds.py index c572bedd..2dcf217b 100644 --- a/pyleoclim/core/psds.py +++ b/pyleoclim/core/psds.py @@ -1,10 +1,6 @@ from ..utils import plotting from ..utils import wavelet as waveutils from ..utils import spectral as specutils -#from ..utils import tsbase - -#from ..core.multiplepsd import * -#import ..core.multiplepsd import matplotlib.pyplot as plt @@ -16,28 +12,6 @@ from matplotlib.ticker import ScalarFormatter, FormatStrFormatter - -# def infer_period_unit_from_time_unit(time_unit): -# ''' infer a period unit based on the given time unit - -# ''' -# if time_unit is None: -# period_unit = None -# else: -# unit_group = lipdutils.timeUnitsCheck(time_unit) -# if unit_group != 'unknown': -# if unit_group == 'kage_units': -# period_unit = 'kyrs' -# else: -# period_unit = 'yrs' -# else: -# if time_unit[-1] == 's': -# period_unit = time_unit -# else: -# period_unit = f'{time_unit}s' - -# return period_unit - def infer_period_unit_from_time_unit(time_unit): ''' infer a period unit based on the given time unit @@ -273,6 +247,7 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95], pyleoclim.core.series.Series.wavelet : Performs wavelet analysis on Pyleoclim Series ''' + from ..core.surrogateseries import SurrogateSeries if self.spec_method == 'wwz' and method == 'ar1asym': raise ValueError('Asymptotic solution is not supported for the wwz method') @@ -300,10 +275,9 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95], return self new = self.copy() - surr = self.timeseries.surrogates( - number=number, seed=seed, method=method, settings=settings - ) - + surr = SurrogateSeries(method=method,number=number, seed=seed) + surr.from_series(self.timeseries) + if signif_scals: surr_psd = surr.spectral( method=self.spec_method, settings=self.spec_args, scalogram_list=signif_scals diff --git a/pyleoclim/core/scalograms.py b/pyleoclim/core/scalograms.py index 0661ad34..2ccd1305 100644 --- a/pyleoclim/core/scalograms.py +++ b/pyleoclim/core/scalograms.py @@ -482,6 +482,7 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95], scalogram = ts.wavelet().signif_test(number=2, export_scal=True) ''' + if self.wave_method == 'wwz' and method == 'ar1asym': raise ValueError('Asymptotic solution is not supported for the wwz method') @@ -490,6 +491,7 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95], raise ValueError(f"The available methods are: {acceptable_methods}") if method in ['ar1sim','uar1'] : + from ..core.surrogateseries import SurrogateSeries if hasattr(self,'signif_scals'): signif_scals = self.signif_scals @@ -517,14 +519,14 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95], number -= len(scalogram_list) surr_scal_tmp = [] surr_scal_tmp.extend(scalogram_list) - surr = self.timeseries.surrogates(number=number, seed=seed, - method=method, settings=settings) + surr = SurrogateSeries(method=method,number=number, seed=seed) + surr.from_series(self.timeseries) surr_scal_tmp.extend(surr.wavelet(method=self.wave_method, settings=self.wave_args).scalogram_list) surr_scal = MultipleScalogram(scalogram_list=surr_scal_tmp) else: - surr = self.timeseries.surrogates(number=number, seed=seed, - method=method, settings=settings) + surr = SurrogateSeries(method=method,number=number, seed=seed) + surr.from_series(self.timeseries) surr_scal = surr.wavelet(method=self.wave_method, settings=self.wave_args) if type(qs) is not list: diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index 4bde3de3..b5cb9d5f 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3479,11 +3479,11 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = ''' from ..core.surrogateseries import SurrogateSeries, supported_surrogates if method == 'isospectral': - warnings.warn("isospectral is deprecated and was replaced by 'phaseran'", + warnings.warn("isospectral is now 'phaseran'", DeprecationWarning, stacklevel=2) method = 'phaseran' elif method == 'isopersistent': - warnings.warn("isopersistent is deprecated and was replaced by 'ar1sim'", + warnings.warn("isopersistent is now 'ar1sim'", DeprecationWarning, stacklevel=2) method = 'ar1sim' diff --git a/pyleoclim/core/surrogateseries.py b/pyleoclim/core/surrogateseries.py index 5d0c7027..a6821baf 100644 --- a/pyleoclim/core/surrogateseries.py +++ b/pyleoclim/core/surrogateseries.py @@ -20,7 +20,7 @@ class SurrogateSeries(EnsembleSeries): Surrogate Series is a child of EnsembleSeries. All methods available for EnsembleSeries are available for surrogate series. ''' - def __init__(self, series_list=[], number=1, method=None, label=None, param=[]): + def __init__(self, series_list=[], number=1, method=None, label=None, param=None, seed=None): ''' Initiatlize a SurrogateSeries object. The parameters below are then used for series generation and labeling. @@ -52,6 +52,7 @@ def __init__(self, series_list=[], number=1, method=None, label=None, param=[]): self.method = method self.param = param self.number = number + self.seed = seed if param is not None: nparam = len(param) @@ -73,7 +74,7 @@ def __init__(self, series_list=[], number=1, method=None, label=None, param=[]): else: raise ValueError(f"Unknown method: {self.method}. Please use one of {supported_surrogates}") - def from_series(self, target_series, seed=None): + def from_series(self, target_series): ''' Fashion the SurrogateSeries object after a target series @@ -113,8 +114,8 @@ def from_series(self, target_series, seed=None): if not isinstance(target_series, Series): raise TypeError(f"Expected pyleo.Series, got: {type(target_series)}") - if seed is not None: - np.random.seed(seed) + if self.seed is not None: + np.random.default_rng(self.seed) # apply surrogate method if self.method == 'ar1sim': @@ -150,7 +151,7 @@ def from_series(self, target_series, seed=None): self.series_list = s_list - def from_param(self, param, length=50, time_pattern = 'even', seed=None, settings=None): + def from_param(self, param, length=50, time_pattern = 'even', settings=None): ''' Simulate the SurrogateSeries from a parametric model @@ -198,8 +199,8 @@ def from_param(self, param, length=50, time_pattern = 'even', seed=None, setting settings = {} if settings is None else settings.copy() - if seed is not None: - np.random.seed(seed) + if self.seed is not None: + np.random.default_rng(self.seed) # generate time axes according to provided pattern if time_pattern == "even": @@ -225,7 +226,7 @@ def from_param(self, param, length=50, time_pattern = 'even', seed=None, setting if self.method in ['uar1','ar1sim']: # if nparam<2: - warnings.warn(f'The AR(1) model needs 2 parameters, tau and sigma2 (in that order); {nparam} provided. default values used',UserWarning, stacklevel=2) + warnings.warn(f'The AR(1) model needs 2 parameters, tau and sigma2 (in that order); {nparam} provided. default values used, tau=5, sigma=0.5',UserWarning, stacklevel=2) param = [5,0.5] y_surr = tsmodel.uar1_sim(t = times, tau=param[0], sigma_2=param[1]) @@ -237,7 +238,7 @@ def from_param(self, param, length=50, time_pattern = 'even', seed=None, setting for i, (t, y) in enumerate(zip(times.T,y_surr.T)): ts = Series(time=t, value=y, label = str(self.label or '') + " surr #" + str(i+1), - verbose=False, auto_time_param=True) + verbose=False, auto_time_params=True) s_list.append(ts) self.series_list = s_list diff --git a/pyleoclim/tests/test_core_PSD.py b/pyleoclim/tests/test_core_PSD.py index 8cc4cd64..3f2965d2 100644 --- a/pyleoclim/tests/test_core_PSD.py +++ b/pyleoclim/tests/test_core_PSD.py @@ -15,23 +15,15 @@ import pytest import pyleoclim as pyleo -def gen_ts(model='colored_noise',alpha=1, nt=100, f0=None, m=None, seed=None): - 'wrapper for gen_ts in pyleoclim' - - t,v = pyleo.utils.gen_ts(model=model,alpha=alpha, nt=nt, f0=f0, m=m, seed=seed) - ts=pyleo.Series(t,v, auto_time_params=True,verbose=False) - return ts - - # Tests below class TestUiPsdPlot: ''' Tests for PSD.plot() ''' - def test_plot_t0(self): + def test_plot_t0(self, gen_ts): ''' Test PSD.plot() with default parameters ''' - ts = gen_ts(nt=500) + ts = gen_ts psd = ts.spectral(method='mtm') fig, ax = psd.plot() pyleo.closefig(fig) @@ -40,10 +32,10 @@ class TestUiPsdSignifTest: ''' Tests for PSD.signif_test() ''' @pytest.mark.parametrize('method',['ar1sim','uar1','ar1asym']) - def test_signif_test_t0(self,method): + def test_signif_test_t0(self,method,gen_ts): ''' Test PSD.signif_test() with default parameters ''' - ts = gen_ts(nt=500) + ts = gen_ts psd = ts.spectral(method='mtm') psd_signif = psd.signif_test(number=10,method=method) fig, ax = psd_signif.plot() diff --git a/pyleoclim/tests/test_core_SurrogateSeries.py b/pyleoclim/tests/test_core_SurrogateSeries.py index 6ce6f574..6cf851c6 100644 --- a/pyleoclim/tests/test_core_SurrogateSeries.py +++ b/pyleoclim/tests/test_core_SurrogateSeries.py @@ -24,13 +24,13 @@ class TestUISurrogatesSeries: ''' @pytest.mark.parametrize('method',['ar1sim','uar1']) @pytest.mark.parametrize('param',[[5,1],[2,2]]) # stacking pytests is legal! - def test_surrogates_ar1(self, method, param, nsim=10, eps=0.3, seed = 108): + def test_surrogates_ar1(self, method, param, nsim=10, eps=1, seed = 108): ''' Generate AR(1) surrogates based on a AR(1) series with certain parameters, estimate the parameters from the surrogates series and verify accuracy ''' - ar1 = pyleo.SurrogateSeries(method='ar1sim', number=nsim) - ar1.from_param(param = param, seed=seed) + ar1 = pyleo.SurrogateSeries(method='ar1sim', number=nsim, seed=seed) + ar1.from_param(param = param) tau = np.empty((nsim)) sigma_2 = np.empty_like(tau) for i, ts in enumerate(ar1.series_list): @@ -45,7 +45,7 @@ def test_surrogates_match(self, method, number): time axis AND that number can be varied ''' t, v = pyleo.utils.gen_ts(model='colored_noise', nt=100) - ts = pyleo.Series(time = t, value = v, verbose=False, auto_time_param=True) + ts = pyleo.Series(time = t, value = v, verbose=False, auto_time_params=True) ar1 = pyleo.SurrogateSeries(method=method, number=number) ar1.from_series(ts) assert len(ar1.series_list) == number # test right number @@ -68,8 +68,8 @@ def test_surrogates_from_param_even(self, delta_t, length, number=2): def test_surrogates_random_time_t0(self, dist, number=2, tol = 3, param=[1]): ''' Test from_param() with random time axes with two 1-parameter distributions ''' - surr = pyleo.SurrogateSeries(method='ar1sim', number=number) - surr.from_param(param = [5,1], length=200, time_pattern='random', seed=108, + surr = pyleo.SurrogateSeries(method='ar1sim', number=number, seed=108) + surr.from_param(param = [5,1], length=200, time_pattern='random', settings={"delta_t_dist" :dist ,"param":param}) if dist == "exponential": @@ -90,11 +90,11 @@ def test_surrogates_random_time_t0(self, dist, number=2, tol = 3, param=[1]): l2_norm = np.linalg.norm(empirical_cdf - theoretical_cdf) assert(l2_norm Date: Fri, 19 Apr 2024 16:31:12 -0700 Subject: [PATCH 10/10] back to np.random.seed() --- pyleoclim/core/surrogateseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyleoclim/core/surrogateseries.py b/pyleoclim/core/surrogateseries.py index a6821baf..abc66ede 100644 --- a/pyleoclim/core/surrogateseries.py +++ b/pyleoclim/core/surrogateseries.py @@ -200,7 +200,7 @@ def from_param(self, param, length=50, time_pattern = 'even', settings=None): settings = {} if settings is None else settings.copy() if self.seed is not None: - np.random.default_rng(self.seed) + np.random.seed(self.seed) # generate time axes according to provided pattern if time_pattern == "even":