From ba9dc57df358b4a9b09370347a9c51dd8c8fcab1 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 30 May 2021 09:12:44 -0700 Subject: [PATCH 1/2] DEPR: silent overflow on Series construction --- pandas/core/dtypes/cast.py | 16 +++++++++++++++- pandas/tests/frame/test_stack_unstack.py | 2 +- pandas/tests/series/test_constructors.py | 15 +++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 40883dd8f747b..5f9f8a57bbaad 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2020,7 +2020,7 @@ def construct_1d_ndarray_preserving_na( def maybe_cast_to_integer_array( arr: list | np.ndarray, dtype: np.dtype, copy: bool = False -): +) -> np.ndarray: """ Takes any dtype and returns the casted version, raising for when data is incompatible with integer/unsigned integer dtypes. @@ -2091,6 +2091,20 @@ def maybe_cast_to_integer_array( if is_float_dtype(arr.dtype) or is_object_dtype(arr.dtype): raise ValueError("Trying to coerce float values to integers") + if casted.dtype < arr.dtype: + # e.g. [1, 200, 923442] and dtype="int8" -> overflows + warnings.warn( + f"Values are too large to be losslessly cast to {dtype}. " + "In a future version this will raise OverflowError. To retain the " + f"old behavior, use pd.Series(values).astype({dtype})", + FutureWarning, + stacklevel=find_stack_level(), + ) + return casted + + # No known cases that get here, but raising explicitly to cover our bases. + raise ValueError(f"values cannot be losslessly cast to {dtype}") + def convert_scalar_for_putitemlike(scalar: Scalar, dtype: np.dtype) -> Scalar: """ diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 4a7c4faade00d..b617514f383af 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -358,7 +358,7 @@ def test_unstack_preserve_dtypes(self): "E": Series([1.0, 50.0, 100.0]).astype("float32"), "F": Series([3.0, 4.0, 5.0]).astype("float64"), "G": False, - "H": Series([1, 200, 923442], dtype="int8"), + "H": Series([1, 200, 923442]).astype("int8"), } ) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index af730bf299336..4ce1af0af1eb0 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -711,6 +711,21 @@ def test_constructor_cast(self): with pytest.raises(ValueError, match=msg): Series(["a", "b", "c"], dtype=float) + def test_constructor_signed_int_overflow_deprecation(self): + # GH#40144 disallow silent overflow + msg = "Values are too large to be losslessly cast" + with tm.assert_produces_warning(FutureWarning, match=msg): + ser = Series([1, 200, 923442], dtype="int8") + + expected = Series([1, -56, 50], dtype="int8") + tm.assert_series_equal(ser, expected) + + with tm.assert_produces_warning(FutureWarning, match=msg): + ser = Series([1, 200, 923442], dtype="uint8") + + expected = Series([1, 200, 50], dtype="uint8") + tm.assert_series_equal(ser, expected) + def test_constructor_unsigned_dtype_overflow(self, uint_dtype): # see gh-15832 msg = "Trying to coerce negative values to unsigned integers" From 73d4bb1d288fd4ba3b74ed41d554a65a57e6c7c5 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 30 May 2021 09:18:23 -0700 Subject: [PATCH 2/2] whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/dtypes/cast.py | 2 +- pandas/tests/series/test_constructors.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index ea9017da8a2f9..42303b511d0a5 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -696,6 +696,7 @@ Deprecations - Deprecated passing arguments (apart from ``value``) as positional in :meth:`DataFrame.fillna` and :meth:`Series.fillna` (:issue:`41485`) - Deprecated passing arguments as positional in :meth:`DataFrame.reset_index` (other than ``"level"``) and :meth:`Series.reset_index` (:issue:`41485`) - Deprecated construction of :class:`Series` or :class:`DataFrame` with ``DatetimeTZDtype`` data and ``datetime64[ns]`` dtype. Use ``Series(data).dt.tz_localize(None)`` instead (:issue:`41555`,:issue:`33401`) +- Deprecated behavior of :class:`Series` construction with large-integer values and small-integer dtype silently overflowing; use ``Series(data).astype(dtype)`` instead (:issue:`41734`) - Deprecated passing arguments as positional in :meth:`DataFrame.set_axis` and :meth:`Series.set_axis` (other than ``"labels"``) (:issue:`41485`) - Deprecated passing arguments as positional in :meth:`DataFrame.where` and :meth:`Series.where` (other than ``"cond"`` and ``"other"``) (:issue:`41485`) - Deprecated passing arguments as positional (other than ``filepath_or_buffer``) in :func:`read_csv` (:issue:`41485`) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5f9f8a57bbaad..b3fe1aecde930 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2092,7 +2092,7 @@ def maybe_cast_to_integer_array( raise ValueError("Trying to coerce float values to integers") if casted.dtype < arr.dtype: - # e.g. [1, 200, 923442] and dtype="int8" -> overflows + # GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows warnings.warn( f"Values are too large to be losslessly cast to {dtype}. " "In a future version this will raise OverflowError. To retain the " diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 4ce1af0af1eb0..8c2c4b895490a 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -712,7 +712,7 @@ def test_constructor_cast(self): Series(["a", "b", "c"], dtype=float) def test_constructor_signed_int_overflow_deprecation(self): - # GH#40144 disallow silent overflow + # GH#41734 disallow silent overflow msg = "Values are too large to be losslessly cast" with tm.assert_produces_warning(FutureWarning, match=msg): ser = Series([1, 200, 923442], dtype="int8")