From 6a5e56dc9402136e74e8c818a6947fd495bcd3b2 Mon Sep 17 00:00:00 2001 From: Jon Crall Date: Tue, 18 Jul 2017 11:58:55 -0400 Subject: [PATCH] BUG: np.inf now causes Index to upcast from int to float (#16996) Closes gh-16957. --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/base.py | 6 +-- pandas/tests/indexing/test_indexing.py | 56 ++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index cba3691b25ab1..2259eb7d89534 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -179,6 +179,7 @@ Bug Fixes ~~~~~~~~~ - Fixes regression in 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) +- Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`) Conversion ^^^^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index bbbc19b36964d..5d50f961927c7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -666,7 +666,7 @@ def _try_convert_to_int_index(cls, data, copy, name): res = data.astype('u8', copy=False) if (res == data).all(): return UInt64Index(res, copy=copy, name=name) - except (TypeError, ValueError): + except (OverflowError, TypeError, ValueError): pass raise ValueError @@ -1640,7 +1640,7 @@ def __contains__(self, key): hash(key) try: return key in self._engine - except (TypeError, ValueError): + except (OverflowError, TypeError, ValueError): return False _index_shared_docs['contains'] = """ @@ -3365,7 +3365,7 @@ def _maybe_cast_indexer(self, key): ckey = int(key) if ckey == key: key = ckey - except (ValueError, TypeError): + except (OverflowError, ValueError, TypeError): pass return key diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 9fa677eb624ae..98f5d5eb140df 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -63,6 +63,34 @@ def f(): pytest.raises(ValueError, f) + def test_inf_upcast(self): + # GH 16957 + # We should be able to use np.inf as a key + # np.inf should cause an index to convert to float + + # Test with np.inf in rows + df = pd.DataFrame(columns=[0]) + df.loc[1] = 1 + df.loc[2] = 2 + df.loc[np.inf] = 3 + + # make sure we can look up the value + assert df.loc[np.inf, 0] == 3 + + result = df.index + expected = pd.Float64Index([1, 2, np.inf]) + tm.assert_index_equal(result, expected) + + # Test with np.inf in columns + df = pd.DataFrame() + df.loc[0, 0] = 1 + df.loc[1, 1] = 2 + df.loc[0, np.inf] = 3 + + result = df.columns + expected = pd.Float64Index([0, 1, np.inf]) + tm.assert_index_equal(result, expected) + def test_setitem_dtype_upcast(self): # GH3216 @@ -542,6 +570,34 @@ def test_astype_assignment_with_dups(self): # result = df.get_dtype_counts().sort_index() # expected = Series({'float64': 2, 'object': 1}).sort_index() + @pytest.mark.parametrize("index,val", [ + (pd.Index([0, 1, 2]), 2), + (pd.Index([0, 1, '2']), '2'), + (pd.Index([0, 1, 2, np.inf, 4]), 4), + (pd.Index([0, 1, 2, np.nan, 4]), 4), + (pd.Index([0, 1, 2, np.inf]), np.inf), + (pd.Index([0, 1, 2, np.nan]), np.nan), + ]) + def test_index_contains(self, index, val): + assert val in index + + @pytest.mark.parametrize("index,val", [ + (pd.Index([0, 1, 2]), '2'), + (pd.Index([0, 1, '2']), 2), + (pd.Index([0, 1, 2, np.inf]), 4), + (pd.Index([0, 1, 2, np.nan]), 4), + (pd.Index([0, 1, 2, np.inf]), np.nan), + (pd.Index([0, 1, 2, np.nan]), np.inf), + # Checking if np.inf in Int64Index should not cause an OverflowError + # Related to GH 16957 + (pd.Int64Index([0, 1, 2]), np.inf), + (pd.Int64Index([0, 1, 2]), np.nan), + (pd.UInt64Index([0, 1, 2]), np.inf), + (pd.UInt64Index([0, 1, 2]), np.nan), + ]) + def test_index_not_contains(self, index, val): + assert val not in index + def test_index_type_coercion(self): with catch_warnings(record=True):