From ea1d5f52a329ba24f1b4424f1e8f00cf9c8485c8 Mon Sep 17 00:00:00 2001 From: Nicholas Musolino Date: Mon, 11 Feb 2019 11:08:47 -0500 Subject: [PATCH] BUG: Fix exceptions when Series.interpolate's `order` parameter is missing or invalid (#25246) * BUG: raise accurate exception from Series.interpolate (#24014) * Actually validate `order` before use in spline * Remove unnecessary check and dead code * Clean up comparison/tests based on feedback * Include invalid order value in exception * Check for NaN order in spline validation * Add whatsnew entry for bug fix * CLN: Make unit tests assert one error at a time * CLN: break test into distinct test case * PEP8 fix in test module * CLN: Test fixture for interpolate methods --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/core/internals/blocks.py | 30 ++++----- pandas/core/missing.py | 7 ++- pandas/tests/series/test_missing.py | 97 ++++++++++++++++++----------- 4 files changed, 78 insertions(+), 58 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index dcb0cbb064f21..11e735028a7d5 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -141,7 +141,7 @@ Indexing Missing ^^^^^^^ -- +- Fixed misleading exception message in :meth:`Series.missing` if argument ``order`` is required, but omitted (:issue:`10633`, :issue:`24014`). - - diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ac7d21de442db..4e2c04dba8b04 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1115,24 +1115,18 @@ def check_int_bool(self, inplace): fill_value=fill_value, coerce=coerce, downcast=downcast) - # try an interp method - try: - m = missing.clean_interp_method(method, **kwargs) - except ValueError: - m = None - - if m is not None: - r = check_int_bool(self, inplace) - if r is not None: - return r - return self._interpolate(method=m, index=index, values=values, - axis=axis, limit=limit, - limit_direction=limit_direction, - limit_area=limit_area, - fill_value=fill_value, inplace=inplace, - downcast=downcast, **kwargs) - - raise ValueError("invalid method '{0}' to interpolate.".format(method)) + # validate the interp method + m = missing.clean_interp_method(method, **kwargs) + + r = check_int_bool(self, inplace) + if r is not None: + return r + return self._interpolate(method=m, index=index, values=values, + axis=axis, limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + fill_value=fill_value, inplace=inplace, + downcast=downcast, **kwargs) def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, limit=None, fill_value=None, coerce=False, diff --git a/pandas/core/missing.py b/pandas/core/missing.py index cc7bdf95200d1..9acdb1a06b2d1 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -293,9 +293,10 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None, bounds_error=bounds_error) new_y = terp(new_x) elif method == 'spline': - # GH #10633 - if not order: - raise ValueError("order needs to be specified and greater than 0") + # GH #10633, #24014 + if isna(order) or (order <= 0): + raise ValueError("order needs to be specified and greater than 0; " + "got order: {}".format(order)) terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs) new_y = terp(new_x) else: diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 985288c439917..f07dd1dfb5fda 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -854,8 +854,23 @@ def test_series_pad_backfill_limit(self): assert_series_equal(result, expected) -class TestSeriesInterpolateData(): +@pytest.fixture(params=['linear', 'index', 'values', 'nearest', 'slinear', + 'zero', 'quadratic', 'cubic', 'barycentric', 'krogh', + 'polynomial', 'spline', 'piecewise_polynomial', + 'from_derivatives', 'pchip', 'akima', ]) +def nontemporal_method(request): + """ Fixture that returns an (method name, required kwargs) pair. + + This fixture does not include method 'time' as a parameterization; that + method requires a Series with a DatetimeIndex, and is generally tested + separately from these non-temporal methods. + """ + method = request.param + kwargs = dict(order=1) if method in ('spline', 'polynomial') else dict() + return method, kwargs + +class TestSeriesInterpolateData(): def test_interpolate(self, datetime_series, string_series): ts = Series(np.arange(len(datetime_series), dtype=float), datetime_series.index) @@ -875,12 +890,12 @@ def test_interpolate(self, datetime_series, string_series): time_interp = ord_ts_copy.interpolate(method='time') tm.assert_series_equal(time_interp, ord_ts) - # try time interpolation on a non-TimeSeries - # Only raises ValueError if there are NaNs. - non_ts = string_series.copy() - non_ts[0] = np.NaN - msg = ("time-weighted interpolation only works on Series or DataFrames" - " with a DatetimeIndex") + def test_interpolate_time_raises_for_non_timeseries(self): + # When method='time' is used on a non-TimeSeries that contains a null + # value, a ValueError should be raised. + non_ts = Series([0, 1, 2, np.NaN]) + msg = ("time-weighted interpolation only works on Series.* " + "with a DatetimeIndex") with pytest.raises(ValueError, match=msg): non_ts.interpolate(method='time') @@ -1061,21 +1076,35 @@ def test_interp_limit(self): result = s.interpolate(method='linear', limit=2) assert_series_equal(result, expected) - # GH 9217, make sure limit is an int and greater than 0 - methods = ['linear', 'time', 'index', 'values', 'nearest', 'zero', - 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', - 'polynomial', 'spline', 'piecewise_polynomial', None, - 'from_derivatives', 'pchip', 'akima'] - s = pd.Series([1, 2, np.nan, np.nan, 5]) - msg = (r"Limit must be greater than 0|" - "time-weighted interpolation only works on Series or" - r" DataFrames with a DatetimeIndex|" - r"invalid method '(polynomial|spline|None)' to interpolate|" - "Limit must be an integer") - for limit in [-1, 0, 1., 2.]: - for method in methods: - with pytest.raises(ValueError, match=msg): - s.interpolate(limit=limit, method=method) + @pytest.mark.parametrize("limit", [-1, 0]) + def test_interpolate_invalid_nonpositive_limit(self, nontemporal_method, + limit): + # GH 9217: make sure limit is greater than zero. + s = pd.Series([1, 2, np.nan, 4]) + method, kwargs = nontemporal_method + with pytest.raises(ValueError, match="Limit must be greater than 0"): + s.interpolate(limit=limit, method=method, **kwargs) + + def test_interpolate_invalid_float_limit(self, nontemporal_method): + # GH 9217: make sure limit is an integer. + s = pd.Series([1, 2, np.nan, 4]) + method, kwargs = nontemporal_method + limit = 2.0 + with pytest.raises(ValueError, match="Limit must be an integer"): + s.interpolate(limit=limit, method=method, **kwargs) + + @pytest.mark.parametrize("invalid_method", [None, 'nonexistent_method']) + def test_interp_invalid_method(self, invalid_method): + s = Series([1, 3, np.nan, 12, np.nan, 25]) + + msg = "method must be one of.* Got '{}' instead".format(invalid_method) + with pytest.raises(ValueError, match=msg): + s.interpolate(method=invalid_method) + + # When an invalid method and invalid limit (such as -1) are + # provided, the error message reflects the invalid method. + with pytest.raises(ValueError, match=msg): + s.interpolate(method=invalid_method, limit=-1) def test_interp_limit_forward(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) @@ -1276,11 +1305,20 @@ def test_interp_limit_no_nans(self): @td.skip_if_no_scipy @pytest.mark.parametrize("method", ['polynomial', 'spline']) def test_no_order(self, method): + # see GH-10633, GH-24014 s = Series([0, 1, np.nan, 3]) - msg = "invalid method '{}' to interpolate".format(method) + msg = "You must specify the order of the spline or polynomial" with pytest.raises(ValueError, match=msg): s.interpolate(method=method) + @td.skip_if_no_scipy + @pytest.mark.parametrize('order', [-1, -1.0, 0, 0.0, np.nan]) + def test_interpolate_spline_invalid_order(self, order): + s = Series([0, 1, np.nan, 3]) + msg = "order needs to be specified and greater than 0" + with pytest.raises(ValueError, match=msg): + s.interpolate(method='spline', order=order) + @td.skip_if_no_scipy def test_spline(self): s = Series([1, 2, np.nan, 4, 5, np.nan, 7]) @@ -1313,19 +1351,6 @@ def test_spline_interpolation(self): expected1 = s.interpolate(method='spline', order=1) assert_series_equal(result1, expected1) - @td.skip_if_no_scipy - def test_spline_error(self): - # see gh-10633 - s = pd.Series(np.arange(10) ** 2) - s[np.random.randint(0, 9, 3)] = np.nan - msg = "invalid method 'spline' to interpolate" - with pytest.raises(ValueError, match=msg): - s.interpolate(method='spline') - - msg = "order needs to be specified and greater than 0" - with pytest.raises(ValueError, match=msg): - s.interpolate(method='spline', order=0) - def test_interp_timedelta64(self): # GH 6424 df = Series([1, np.nan, 3],