From b424439e8b03f3ea4243ef4d849e5c5bace8888a Mon Sep 17 00:00:00 2001 From: joncrall Date: Mon, 17 Jul 2017 12:46:37 -0400 Subject: [PATCH 1/7] FIX: np.inf now upcasts int Index to float --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/base.py | 6 ++-- pandas/tests/indexing/test_indexing.py | 41 ++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 935e9d740b91c..4ff973046f932 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -145,6 +145,7 @@ Bug Fixes ~~~~~~~~~ - Fixes regression in 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) +- Fixes inf upcast from integer indices in 0.20, previously :func:`Int64Index.__contains__` and `DataFrame.loc.__setitem__` raised an `OverflowError` when given `np.inf` (:issue:`16957`) Conversion ^^^^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index bbbc19b36964d..5d50f961927c7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -666,7 +666,7 @@ def _try_convert_to_int_index(cls, data, copy, name): res = data.astype('u8', copy=False) if (res == data).all(): return UInt64Index(res, copy=copy, name=name) - except (TypeError, ValueError): + except (OverflowError, TypeError, ValueError): pass raise ValueError @@ -1640,7 +1640,7 @@ def __contains__(self, key): hash(key) try: return key in self._engine - except (TypeError, ValueError): + except (OverflowError, TypeError, ValueError): return False _index_shared_docs['contains'] = """ @@ -3365,7 +3365,7 @@ def _maybe_cast_indexer(self, key): ckey = int(key) if ckey == key: key = ckey - except (ValueError, TypeError): + except (OverflowError, ValueError, TypeError): pass return key diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 9fa677eb624ae..33d0247a64b62 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -63,6 +63,35 @@ def f(): pytest.raises(ValueError, f) + def test_inf_upcast(self): + # GH 16957 + # We should be able to use np.inf as a key + # np.inf should cause an index to convert to float + + # Test with np.inf in rows + df = pd.DataFrame(columns=[0]) + df.loc[1] = 1 + df.loc[2] = 2 + df.loc[np.inf] = 3 + + # make sure we can look up the value + result = df.loc[np.inf, 0] + tm.assert_almost_equal(result, 3) + + result = df.index + expected = pd.Float64Index([1, 2, np.inf]) + tm.assert_index_equal(result, expected) + + # Test with np.inf in columns + df = pd.DataFrame() + df.loc[0, 0] = 1 + df.loc[1, 1] = 2 + df.loc[0, np.inf] = 3 + + result = df.columns + expected = pd.Float64Index([0, 1, np.inf]) + tm.assert_index_equal(result, expected) + def test_setitem_dtype_upcast(self): # GH3216 @@ -542,6 +571,18 @@ def test_astype_assignment_with_dups(self): # result = df.get_dtype_counts().sort_index() # expected = Series({'float64': 2, 'object': 1}).sort_index() + def test_coercion_with_contains(self): + # Related to GH 16957 + # Checking if Int64Index contains np.inf should catch the OverflowError + for val in [np.inf, np.nan]: + index = pd.Int64Index([1, 2, 3]) + result = val in index + tm.assert_almost_equal(result, False) + + index = pd.UInt64Index([1, 2, 3]) + result = np.inf in index + tm.assert_almost_equal(result, False) + def test_index_type_coercion(self): with catch_warnings(record=True): From f040f4e76bd524c69e97b066b48ec7472a1e3756 Mon Sep 17 00:00:00 2001 From: joncrall Date: Mon, 17 Jul 2017 16:58:16 -0400 Subject: [PATCH 2/7] changes based on code review --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/tests/indexing/test_indexing.py | 27 +++++++++++++++----------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 4ff973046f932..9deb10fcb40d7 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -145,7 +145,7 @@ Bug Fixes ~~~~~~~~~ - Fixes regression in 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) -- Fixes inf upcast from integer indices in 0.20, previously :func:`Int64Index.__contains__` and `DataFrame.loc.__setitem__` raised an `OverflowError` when given `np.inf` (:issue:`16957`) +- Fixes bug where indexing with `np.inf` caused an `OverflowError` to be raised (:issue:`16957`) Conversion ^^^^^^^^^^ diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 33d0247a64b62..be5aaac80e44d 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -75,8 +75,7 @@ def test_inf_upcast(self): df.loc[np.inf] = 3 # make sure we can look up the value - result = df.loc[np.inf, 0] - tm.assert_almost_equal(result, 3) + assert df.loc[np.inf, 0] == 3 result = df.index expected = pd.Float64Index([1, 2, np.inf]) @@ -571,17 +570,23 @@ def test_astype_assignment_with_dups(self): # result = df.get_dtype_counts().sort_index() # expected = Series({'float64': 2, 'object': 1}).sort_index() - def test_coercion_with_contains(self): + @pytest.mark.parametrize("val,expected", [ + (np.inf, False), + (np.nan, False), + (3.0, True), + (3.5, False), + (0.0, True), + ]) + def test_coercion_with_contains(self, val, expected): # Related to GH 16957 # Checking if Int64Index contains np.inf should catch the OverflowError - for val in [np.inf, np.nan]: - index = pd.Int64Index([1, 2, 3]) - result = val in index - tm.assert_almost_equal(result, False) - - index = pd.UInt64Index([1, 2, 3]) - result = np.inf in index - tm.assert_almost_equal(result, False) + index = pd.Int64Index([1, 2, 3]) + result = val in index + assert result is expected + + index = pd.UInt64Index([1, 2, 3]) + result = val in index + assert result is expected def test_index_type_coercion(self): From 6f3886f7f37afdb3d7e839c23b36bcbdbe9add59 Mon Sep 17 00:00:00 2001 From: joncrall Date: Mon, 17 Jul 2017 17:23:45 -0400 Subject: [PATCH 3/7] code review change --- pandas/tests/indexing/test_indexing.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index be5aaac80e44d..dbb5c682f91b7 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -571,20 +571,19 @@ def test_astype_assignment_with_dups(self): # expected = Series({'float64': 2, 'object': 1}).sort_index() @pytest.mark.parametrize("val,expected", [ - (np.inf, False), - (np.nan, False), - (3.0, True), - (3.5, False), - (0.0, True), + # Checking if np.inf in Int64Index should not cause an OverflowError + (pd.Int64Index([0, 1, 2]), np.inf, False), + (pd.Int64Index([0, 1, 2]), np.nan, False), + (pd.UInt64Index([0, 1, 2]), np.inf, False), + (pd.UInt64Index([0, 1, 2]), np.nan, False), + # Check contains correctly checks for np.inf + (pd.Index([0, 1, 2, np.inf]), np.inf, True), + (pd.Index([0, 1, 2, np.nan]), np.nan, True), + (pd.Index([0, 1, 2, np.inf]), np.nan, False), + (pd.Index([0, 1, 2, np.nan]), np.inf, False), ]) - def test_coercion_with_contains(self, val, expected): + def test_coercion_with_contains(self, index, val, expected): # Related to GH 16957 - # Checking if Int64Index contains np.inf should catch the OverflowError - index = pd.Int64Index([1, 2, 3]) - result = val in index - assert result is expected - - index = pd.UInt64Index([1, 2, 3]) result = val in index assert result is expected From e029708f13b042a39839204b398ea280a0663768 Mon Sep 17 00:00:00 2001 From: joncrall Date: Mon, 17 Jul 2017 17:24:04 -0400 Subject: [PATCH 4/7] quick fix --- pandas/tests/indexing/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index dbb5c682f91b7..f68298d1c78bc 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -570,7 +570,7 @@ def test_astype_assignment_with_dups(self): # result = df.get_dtype_counts().sort_index() # expected = Series({'float64': 2, 'object': 1}).sort_index() - @pytest.mark.parametrize("val,expected", [ + @pytest.mark.parametrize("index,val,expected", [ # Checking if np.inf in Int64Index should not cause an OverflowError (pd.Int64Index([0, 1, 2]), np.inf, False), (pd.Int64Index([0, 1, 2]), np.nan, False), From 61d82f6fc657d41ba4b062d0381f845bffac1d04 Mon Sep 17 00:00:00 2001 From: joncrall Date: Mon, 17 Jul 2017 18:12:37 -0400 Subject: [PATCH 5/7] Testing Index contains and not_contains --- pandas/tests/indexing/test_indexing.py | 39 +++++++++++++++++--------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index f68298d1c78bc..c86df70d27089 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -570,22 +570,33 @@ def test_astype_assignment_with_dups(self): # result = df.get_dtype_counts().sort_index() # expected = Series({'float64': 2, 'object': 1}).sort_index() - @pytest.mark.parametrize("index,val,expected", [ + @pytest.mark.parametrize("index,val", [ + (pd.Index([0, 1, 2]), 2), + (pd.Index([0, 1, '2']), '2'), + (pd.Index([0, 1, 2, np.inf]), 4), + (pd.Index([0, 1, 2, np.nan]), 4), + (pd.Index([0, 1, 2, np.inf]), np.nan), + (pd.Index([0, 1, 2, np.nan]), np.inf), # Checking if np.inf in Int64Index should not cause an OverflowError - (pd.Int64Index([0, 1, 2]), np.inf, False), - (pd.Int64Index([0, 1, 2]), np.nan, False), - (pd.UInt64Index([0, 1, 2]), np.inf, False), - (pd.UInt64Index([0, 1, 2]), np.nan, False), - # Check contains correctly checks for np.inf - (pd.Index([0, 1, 2, np.inf]), np.inf, True), - (pd.Index([0, 1, 2, np.nan]), np.nan, True), - (pd.Index([0, 1, 2, np.inf]), np.nan, False), - (pd.Index([0, 1, 2, np.nan]), np.inf, False), - ]) - def test_coercion_with_contains(self, index, val, expected): # Related to GH 16957 - result = val in index - assert result is expected + (pd.Int64Index([0, 1, 2]), np.inf), + (pd.Int64Index([0, 1, 2]), np.nan), + (pd.UInt64Index([0, 1, 2]), np.inf), + (pd.UInt64Index([0, 1, 2]), np.nan), + ]) + def test_index_contains(self, index, val): + assert val in index + + @pytest.mark.parametrize("index,val", [ + (pd.Index([0, 1, 2]), '2'), + (pd.Index([0, 1, '2']), 2), + (pd.Index([0, 1, 2, np.inf, 4]), 4), + (pd.Index([0, 1, 2, np.nan, 4]), 4), + (pd.Index([0, 1, 2, np.inf]), np.inf), + (pd.Index([0, 1, 2, np.nan]), np.nan), + ]) + def test_index_not_contains(self, index, val): + assert val not in index def test_index_type_coercion(self): From 5de878f127fcc60dd5d8a930277a89028fd8fb39 Mon Sep 17 00:00:00 2001 From: joncrall Date: Mon, 17 Jul 2017 19:18:07 -0400 Subject: [PATCH 6/7] fixed incorrect example placements --- pandas/tests/indexing/test_indexing.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index c86df70d27089..98f5d5eb140df 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -573,6 +573,17 @@ def test_astype_assignment_with_dups(self): @pytest.mark.parametrize("index,val", [ (pd.Index([0, 1, 2]), 2), (pd.Index([0, 1, '2']), '2'), + (pd.Index([0, 1, 2, np.inf, 4]), 4), + (pd.Index([0, 1, 2, np.nan, 4]), 4), + (pd.Index([0, 1, 2, np.inf]), np.inf), + (pd.Index([0, 1, 2, np.nan]), np.nan), + ]) + def test_index_contains(self, index, val): + assert val in index + + @pytest.mark.parametrize("index,val", [ + (pd.Index([0, 1, 2]), '2'), + (pd.Index([0, 1, '2']), 2), (pd.Index([0, 1, 2, np.inf]), 4), (pd.Index([0, 1, 2, np.nan]), 4), (pd.Index([0, 1, 2, np.inf]), np.nan), @@ -584,17 +595,6 @@ def test_astype_assignment_with_dups(self): (pd.UInt64Index([0, 1, 2]), np.inf), (pd.UInt64Index([0, 1, 2]), np.nan), ]) - def test_index_contains(self, index, val): - assert val in index - - @pytest.mark.parametrize("index,val", [ - (pd.Index([0, 1, 2]), '2'), - (pd.Index([0, 1, '2']), 2), - (pd.Index([0, 1, 2, np.inf, 4]), 4), - (pd.Index([0, 1, 2, np.nan, 4]), 4), - (pd.Index([0, 1, 2, np.inf]), np.inf), - (pd.Index([0, 1, 2, np.nan]), np.nan), - ]) def test_index_not_contains(self, index, val): assert val not in index From 7310c36c382da4c9829e8f0af1226a431fefaa1e Mon Sep 17 00:00:00 2001 From: joncrall Date: Tue, 18 Jul 2017 10:31:25 -0400 Subject: [PATCH 7/7] minor doc fix --- doc/source/whatsnew/v0.21.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 9deb10fcb40d7..b8c4cf61edc5b 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -145,7 +145,7 @@ Bug Fixes ~~~~~~~~~ - Fixes regression in 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) -- Fixes bug where indexing with `np.inf` caused an `OverflowError` to be raised (:issue:`16957`) +- Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`) Conversion ^^^^^^^^^^