From 40e86f5b71554b11dd4eba22c1977cc93754dc0c Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Mon, 11 Nov 2019 20:07:03 +0000 Subject: [PATCH] ENH: Enforce type when creating bootstraps Raise if inputs are not ndarray, DataFrame or Series closes #315 --- arch/bootstrap/base.py | 32 ++++++++++++++++++-------- arch/tests/bootstrap/test_bootstrap.py | 10 ++++++++ arch/utility/exceptions.py | 11 +++++++++ doc/source/changes/4.0.txt | 1 + 4 files changed, 44 insertions(+), 10 deletions(-) diff --git a/arch/bootstrap/base.py b/arch/bootstrap/base.py index 76cc227ac1..788c7ba191 100644 --- a/arch/bootstrap/base.py +++ b/arch/bootstrap/base.py @@ -8,7 +8,8 @@ import scipy.stats as stats from arch.utility.array import DocStringInheritor -from arch.utility.exceptions import StudentizationError, studentization_error +from arch.utility.exceptions import StudentizationError, studentization_error,\ + arg_type_error, kwarg_type_error __all__ = ['IIDBootstrap', 'StationaryBootstrap', 'CircularBlockBootstrap', 'MovingBlockBootstrap', 'IndependentSamplesBootstrap'] @@ -181,21 +182,23 @@ class IIDBootstrap(object, metaclass=DocStringInheritor): arch.bootstrap.IndependentSamplesBootstrap """ + _name = 'IID Bootstrap' _common_size_required = True def __init__(self, *args, **kwargs): self._random_state = None - self._args = args + self._args = list(args) self._kwargs = kwargs random_state = self._kwargs.pop('random_state', None) self.random_state = random_state if random_state is not None else RandomState() self._initial_state = self._random_state.get_state() + + self._check_data() if args: self._num_items = len(args[0]) elif kwargs: key = list(kwargs.keys())[0] self._num_items = len(kwargs[key]) - all_args = list(args) all_args.extend([v for v in kwargs.values()]) if self._common_size_required: @@ -209,13 +212,12 @@ def __init__(self, *args, **kwargs): self._seed = None self.pos_data = args self.kw_data = kwargs - self.data = (args, kwargs) + self.data = (self.pos_data, self.kw_data) self._base = None self._results = None self._studentized_results = None self._last_func = None - self._name = 'IID Bootstrap' for key, value in kwargs.items(): attr = getattr(self, key, None) if attr is None: @@ -560,6 +562,16 @@ def conf_int(self, func, reps=1000, method='basic', size=0.95, tail='two', return np.vstack((lower, upper)) + def _check_data(self): + supported = (np.ndarray, pd.DataFrame, pd.Series) + for i, arg in enumerate(self._args): + if not isinstance(arg, supported): + raise TypeError(arg_type_error.format(i=i, arg_type=type(arg))) + for key in self._kwargs: + if not isinstance(self._kwargs[key], supported): + arg_type = type(self._kwargs[key]) + raise TypeError(kwarg_type_error.format(key=key, arg_type=arg_type)) + def _bca_bias(self): p = (self._results < self._base).mean(axis=0) b = stats.norm.ppf(p) @@ -959,10 +971,11 @@ class IndependentSamplesBootstrap(IIDBootstrap): """ _common_size_required = False + _name = 'Heterogeneous IID Bootstrap' def __init__(self, *args, **kwargs): super(IndependentSamplesBootstrap, self).__init__(*args, **kwargs) - self._name = 'Heterogeneous IID Bootstrap' + self._num_args = len(args) self._num_arg_items = [len(arg) for arg in args] self._num_kw_items = {key: len(kwargs[key]) for key in self._kwargs} @@ -1102,12 +1115,12 @@ class CircularBlockBootstrap(IIDBootstrap): >>> rs = RandomState(1234) >>> bs = CircularBlockBootstrap(17, x, y=y, z=z, random_state=rs) """ + _name = 'Circular Block Bootstrap' def __init__(self, block_size, *args, **kwargs): super(CircularBlockBootstrap, self).__init__(*args, **kwargs) self.block_size = block_size self._parameters = [block_size] - self._name = 'Circular Block Bootstrap' def __str__(self): txt = self._name @@ -1205,12 +1218,11 @@ class StationaryBootstrap(CircularBlockBootstrap): >>> rs = RandomState(1234) >>> bs = StationaryBootstrap(12, x, y=y, z=z, random_state=rs) """ + _name = 'Stationary Bootstrap' def __init__(self, block_size, *args, **kwargs): super(StationaryBootstrap, self).__init__(block_size, *args, **kwargs) - self._name = 'Stationary Bootstrap' self._p = 1.0 / block_size - self._name = 'Stationary Bootstrap' def update_indices(self): indices = self.random_state.randint(self._num_items, @@ -1284,10 +1296,10 @@ class MovingBlockBootstrap(CircularBlockBootstrap): >>> rs = RandomState(1234) >>> bs = MovingBlockBootstrap(7, x, y=y, z=z, random_state=rs) """ + _name = 'Moving Block Bootstrap' def __init__(self, block_size, *args, **kwargs): super(MovingBlockBootstrap, self).__init__(block_size, *args, **kwargs) - self._name = 'Moving Block Bootstrap' def update_indices(self): num_blocks = self._num_items // self.block_size diff --git a/arch/tests/bootstrap/test_bootstrap.py b/arch/tests/bootstrap/test_bootstrap.py index 783655b479..6ade51dfcb 100644 --- a/arch/tests/bootstrap/test_bootstrap.py +++ b/arch/tests/bootstrap/test_bootstrap.py @@ -851,3 +851,13 @@ def f(x): bs = IIDBootstrap(x) with pytest.raises(StudentizationError): bs.conf_int(f, 100, method='studentized') + + +def test_list_input(): + # GH 315 + with pytest.raises(TypeError, match="Positional input 0 "): + vals = np.random.standard_normal(25).tolist() + IIDBootstrap(vals) + with pytest.raises(TypeError, match="Input `data` "): + vals = np.random.standard_normal(25).tolist() + IIDBootstrap(data=vals) diff --git a/arch/utility/exceptions.py b/arch/utility/exceptions.py index 3e52ae88f3..193837d306 100644 --- a/arch/utility/exceptions.py +++ b/arch/utility/exceptions.py @@ -58,6 +58,17 @@ class DataScaleWarning(Warning): """ +arg_type_error = """\ +Only NumPy arrays and pandas DataFrames and Series are supported in positional +arguments. Positional input {i} has type {arg_type}. +""" + +kwarg_type_error = """\ +Only NumPy arrays and pandas DataFrames and Series are supported in keyword +arguments. Input `{key}` has type {arg_type}. +""" + + class StudentizationError(RuntimeError): pass diff --git a/doc/source/changes/4.0.txt b/doc/source/changes/4.0.txt index 3419dc7b22..3a4d16e22d 100644 --- a/doc/source/changes/4.0.txt +++ b/doc/source/changes/4.0.txt @@ -4,6 +4,7 @@ Version 4 Since 4.10 ========== +- Error if inputs are not ndarrays, DataFrames or Series (:issue:`315`). - Added a check that the covariance is non-zero when using "studentized" confidence intervals. If the function bootstrapped produces statistics with 0 variance, it is not possible to studentized (:issue:`322`).