Skip to content

Commit

Permalink
DEPR: silent overflow on Series construction (#41734)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Jun 1, 2021
1 parent b9e96fa commit db6e71b
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,7 @@ Deprecations
- Deprecated passing arguments (apart from ``value``) as positional in :meth:`DataFrame.fillna` and :meth:`Series.fillna` (:issue:`41485`)
- Deprecated passing arguments as positional in :meth:`DataFrame.reset_index` (other than ``"level"``) and :meth:`Series.reset_index` (:issue:`41485`)
- Deprecated construction of :class:`Series` or :class:`DataFrame` with ``DatetimeTZDtype`` data and ``datetime64[ns]`` dtype. Use ``Series(data).dt.tz_localize(None)`` instead (:issue:`41555`,:issue:`33401`)
- Deprecated behavior of :class:`Series` construction with large-integer values and small-integer dtype silently overflowing; use ``Series(data).astype(dtype)`` instead (:issue:`41734`)
- Deprecated inference of ``timedelta64[ns]``, ``datetime64[ns]``, or ``DatetimeTZDtype`` dtypes in :class:`Series` construction when data containing strings is passed and no ``dtype`` is passed (:issue:`33558`)
- In a future version, constructing :class:`Series` or :class:`DataFrame` with ``datetime64[ns]`` data and ``DatetimeTZDtype`` will treat the data as wall-times instead of as UTC times (matching DatetimeIndex behavior). To treat the data as UTC times, use ``pd.Series(data).dt.tz_localize("UTC").dt.tz_convert(dtype.tz)`` or ``pd.Series(data.view("int64"), dtype=dtype)`` (:issue:`33401`)
- Deprecated passing arguments as positional in :meth:`DataFrame.set_axis` and :meth:`Series.set_axis` (other than ``"labels"``) (:issue:`41485`)
Expand Down
16 changes: 15 additions & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -2036,7 +2036,7 @@ def construct_1d_ndarray_preserving_na(

def maybe_cast_to_integer_array(
arr: list | np.ndarray, dtype: np.dtype, copy: bool = False
):
) -> np.ndarray:
"""
Takes any dtype and returns the casted version, raising for when data is
incompatible with integer/unsigned integer dtypes.
Expand Down Expand Up @@ -2107,6 +2107,20 @@ def maybe_cast_to_integer_array(
if is_float_dtype(arr.dtype) or is_object_dtype(arr.dtype):
raise ValueError("Trying to coerce float values to integers")

if casted.dtype < arr.dtype:
# GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows
warnings.warn(
f"Values are too large to be losslessly cast to {dtype}. "
"In a future version this will raise OverflowError. To retain the "
f"old behavior, use pd.Series(values).astype({dtype})",
FutureWarning,
stacklevel=find_stack_level(),
)
return casted

# No known cases that get here, but raising explicitly to cover our bases.
raise ValueError(f"values cannot be losslessly cast to {dtype}")


def convert_scalar_for_putitemlike(scalar: Scalar, dtype: np.dtype) -> Scalar:
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_stack_unstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def test_unstack_preserve_dtypes(self):
"E": Series([1.0, 50.0, 100.0]).astype("float32"),
"F": Series([3.0, 4.0, 5.0]).astype("float64"),
"G": False,
"H": Series([1, 200, 923442], dtype="int8"),
"H": Series([1, 200, 923442]).astype("int8"),
}
)

Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,21 @@ def test_constructor_cast(self):
with pytest.raises(ValueError, match=msg):
Series(["a", "b", "c"], dtype=float)

def test_constructor_signed_int_overflow_deprecation(self):
# GH#41734 disallow silent overflow
msg = "Values are too large to be losslessly cast"
with tm.assert_produces_warning(FutureWarning, match=msg):
ser = Series([1, 200, 923442], dtype="int8")

expected = Series([1, -56, 50], dtype="int8")
tm.assert_series_equal(ser, expected)

with tm.assert_produces_warning(FutureWarning, match=msg):
ser = Series([1, 200, 923442], dtype="uint8")

expected = Series([1, 200, 50], dtype="uint8")
tm.assert_series_equal(ser, expected)

def test_constructor_unsigned_dtype_overflow(self, uint_dtype):
# see gh-15832
msg = "Trying to coerce negative values to unsigned integers"
Expand Down

0 comments on commit db6e71b

Please sign in to comment.