Skip to content

Commit

Permalink
BUG: Fix exceptions when Series.interpolate's order parameter is mi…
Browse files Browse the repository at this point in the history
…ssing or invalid (pandas-dev#25246)

* BUG: raise accurate exception from Series.interpolate (pandas-dev#24014)

* Actually validate `order` before use in spline

* Remove unnecessary check and dead code

* Clean up comparison/tests based on feedback

* Include invalid order value in exception

* Check for NaN order in spline validation

* Add whatsnew entry for bug fix

* CLN: Make unit tests assert one error at a time

* CLN: break test into distinct test case

* PEP8 fix in test module

* CLN: Test fixture for interpolate methods
  • Loading branch information
nmusolino authored and Pingviinituutti committed Feb 28, 2019
1 parent 34b0988 commit 4b8ce58
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 58 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ Indexing
Missing
^^^^^^^

-
- Fixed misleading exception message in :meth:`Series.missing` if argument ``order`` is required, but omitted (:issue:`10633`, :issue:`24014`).
-
-

Expand Down
30 changes: 12 additions & 18 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1115,24 +1115,18 @@ def check_int_bool(self, inplace):
fill_value=fill_value,
coerce=coerce,
downcast=downcast)
# try an interp method
try:
m = missing.clean_interp_method(method, **kwargs)
except ValueError:
m = None

if m is not None:
r = check_int_bool(self, inplace)
if r is not None:
return r
return self._interpolate(method=m, index=index, values=values,
axis=axis, limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
fill_value=fill_value, inplace=inplace,
downcast=downcast, **kwargs)

raise ValueError("invalid method '{0}' to interpolate.".format(method))
# validate the interp method
m = missing.clean_interp_method(method, **kwargs)

r = check_int_bool(self, inplace)
if r is not None:
return r
return self._interpolate(method=m, index=index, values=values,
axis=axis, limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
fill_value=fill_value, inplace=inplace,
downcast=downcast, **kwargs)

def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
limit=None, fill_value=None, coerce=False,
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,9 +293,10 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None,
bounds_error=bounds_error)
new_y = terp(new_x)
elif method == 'spline':
# GH #10633
if not order:
raise ValueError("order needs to be specified and greater than 0")
# GH #10633, #24014
if isna(order) or (order <= 0):
raise ValueError("order needs to be specified and greater than 0; "
"got order: {}".format(order))
terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs)
new_y = terp(new_x)
else:
Expand Down
97 changes: 61 additions & 36 deletions pandas/tests/series/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -854,8 +854,23 @@ def test_series_pad_backfill_limit(self):
assert_series_equal(result, expected)


class TestSeriesInterpolateData():
@pytest.fixture(params=['linear', 'index', 'values', 'nearest', 'slinear',
'zero', 'quadratic', 'cubic', 'barycentric', 'krogh',
'polynomial', 'spline', 'piecewise_polynomial',
'from_derivatives', 'pchip', 'akima', ])
def nontemporal_method(request):
""" Fixture that returns an (method name, required kwargs) pair.
This fixture does not include method 'time' as a parameterization; that
method requires a Series with a DatetimeIndex, and is generally tested
separately from these non-temporal methods.
"""
method = request.param
kwargs = dict(order=1) if method in ('spline', 'polynomial') else dict()
return method, kwargs


class TestSeriesInterpolateData():
def test_interpolate(self, datetime_series, string_series):
ts = Series(np.arange(len(datetime_series), dtype=float),
datetime_series.index)
Expand All @@ -875,12 +890,12 @@ def test_interpolate(self, datetime_series, string_series):
time_interp = ord_ts_copy.interpolate(method='time')
tm.assert_series_equal(time_interp, ord_ts)

# try time interpolation on a non-TimeSeries
# Only raises ValueError if there are NaNs.
non_ts = string_series.copy()
non_ts[0] = np.NaN
msg = ("time-weighted interpolation only works on Series or DataFrames"
" with a DatetimeIndex")
def test_interpolate_time_raises_for_non_timeseries(self):
# When method='time' is used on a non-TimeSeries that contains a null
# value, a ValueError should be raised.
non_ts = Series([0, 1, 2, np.NaN])
msg = ("time-weighted interpolation only works on Series.* "
"with a DatetimeIndex")
with pytest.raises(ValueError, match=msg):
non_ts.interpolate(method='time')

Expand Down Expand Up @@ -1061,21 +1076,35 @@ def test_interp_limit(self):
result = s.interpolate(method='linear', limit=2)
assert_series_equal(result, expected)

# GH 9217, make sure limit is an int and greater than 0
methods = ['linear', 'time', 'index', 'values', 'nearest', 'zero',
'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh',
'polynomial', 'spline', 'piecewise_polynomial', None,
'from_derivatives', 'pchip', 'akima']
s = pd.Series([1, 2, np.nan, np.nan, 5])
msg = (r"Limit must be greater than 0|"
"time-weighted interpolation only works on Series or"
r" DataFrames with a DatetimeIndex|"
r"invalid method '(polynomial|spline|None)' to interpolate|"
"Limit must be an integer")
for limit in [-1, 0, 1., 2.]:
for method in methods:
with pytest.raises(ValueError, match=msg):
s.interpolate(limit=limit, method=method)
@pytest.mark.parametrize("limit", [-1, 0])
def test_interpolate_invalid_nonpositive_limit(self, nontemporal_method,
limit):
# GH 9217: make sure limit is greater than zero.
s = pd.Series([1, 2, np.nan, 4])
method, kwargs = nontemporal_method
with pytest.raises(ValueError, match="Limit must be greater than 0"):
s.interpolate(limit=limit, method=method, **kwargs)

def test_interpolate_invalid_float_limit(self, nontemporal_method):
# GH 9217: make sure limit is an integer.
s = pd.Series([1, 2, np.nan, 4])
method, kwargs = nontemporal_method
limit = 2.0
with pytest.raises(ValueError, match="Limit must be an integer"):
s.interpolate(limit=limit, method=method, **kwargs)

@pytest.mark.parametrize("invalid_method", [None, 'nonexistent_method'])
def test_interp_invalid_method(self, invalid_method):
s = Series([1, 3, np.nan, 12, np.nan, 25])

msg = "method must be one of.* Got '{}' instead".format(invalid_method)
with pytest.raises(ValueError, match=msg):
s.interpolate(method=invalid_method)

# When an invalid method and invalid limit (such as -1) are
# provided, the error message reflects the invalid method.
with pytest.raises(ValueError, match=msg):
s.interpolate(method=invalid_method, limit=-1)

def test_interp_limit_forward(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
Expand Down Expand Up @@ -1276,11 +1305,20 @@ def test_interp_limit_no_nans(self):
@td.skip_if_no_scipy
@pytest.mark.parametrize("method", ['polynomial', 'spline'])
def test_no_order(self, method):
# see GH-10633, GH-24014
s = Series([0, 1, np.nan, 3])
msg = "invalid method '{}' to interpolate".format(method)
msg = "You must specify the order of the spline or polynomial"
with pytest.raises(ValueError, match=msg):
s.interpolate(method=method)

@td.skip_if_no_scipy
@pytest.mark.parametrize('order', [-1, -1.0, 0, 0.0, np.nan])
def test_interpolate_spline_invalid_order(self, order):
s = Series([0, 1, np.nan, 3])
msg = "order needs to be specified and greater than 0"
with pytest.raises(ValueError, match=msg):
s.interpolate(method='spline', order=order)

@td.skip_if_no_scipy
def test_spline(self):
s = Series([1, 2, np.nan, 4, 5, np.nan, 7])
Expand Down Expand Up @@ -1313,19 +1351,6 @@ def test_spline_interpolation(self):
expected1 = s.interpolate(method='spline', order=1)
assert_series_equal(result1, expected1)

@td.skip_if_no_scipy
def test_spline_error(self):
# see gh-10633
s = pd.Series(np.arange(10) ** 2)
s[np.random.randint(0, 9, 3)] = np.nan
msg = "invalid method 'spline' to interpolate"
with pytest.raises(ValueError, match=msg):
s.interpolate(method='spline')

msg = "order needs to be specified and greater than 0"
with pytest.raises(ValueError, match=msg):
s.interpolate(method='spline', order=0)

def test_interp_timedelta64(self):
# GH 6424
df = Series([1, np.nan, 3],
Expand Down

0 comments on commit 4b8ce58

Please sign in to comment.