DEPR: Enforce Series(float_with_nan, dtype=inty) (pandas-dev#49605)

* DEPR: Enforce Series(float_with_nan, dtype=inty) * update asv * troubleshoot asv * suggested asv edit
mliu08 · Nov 27, 2022 · f7303b1 · f7303b1
1 parent 3afb93f
commit f7303b1
Show file tree

Hide file tree

Showing 6 changed files with 68 additions and 82 deletions.
diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -600,31 +600,35 @@ def time_frame_agg(self, dtype, method):
 
 
 class Cumulative:
-    param_names = ["dtype", "method"]
+    param_names = ["dtype", "method", "with_nans"]
     params = [
         ["float64", "int64", "Float64", "Int64"],
         ["cummin", "cummax", "cumsum"],
+        [True, False],
     ]
 
-    def setup(self, dtype, method):
+    def setup(self, dtype, method, with_nans):
+        if with_nans and dtype == "int64":
+            raise NotImplementedError("Construction of df would raise")
+
         N = 500_000
-        vals = np.random.randint(-10, 10, (N, 5))
-        null_vals = vals.astype(float, copy=True)
-        null_vals[::2, :] = np.nan
-        null_vals[::3, :] = np.nan
-        df = DataFrame(vals, columns=list("abcde"), dtype=dtype)
-        null_df = DataFrame(null_vals, columns=list("abcde"), dtype=dtype)
         keys = np.random.randint(0, 100, size=N)
-        df["key"] = keys
-        null_df["key"] = keys
-        self.df = df
-        self.null_df = null_df
+        vals = np.random.randint(-10, 10, (N, 5))
 
-    def time_frame_transform(self, dtype, method):
-        self.df.groupby("key").transform(method)
+        if with_nans:
+            null_vals = vals.astype(float, copy=True)
+            null_vals[::2, :] = np.nan
+            null_vals[::3, :] = np.nan
+            df = DataFrame(null_vals, columns=list("abcde"), dtype=dtype)
+            df["key"] = keys
+            self.df = df
+        else:
+            df = DataFrame(vals, columns=list("abcde")).astype(dtype, copy=False)
+            df["key"] = keys
+            self.df = df
 
-    def time_frame_transform_many_nulls(self, dtype, method):
-        self.null_df.groupby("key").transform(method)
+    def time_frame_transform(self, dtype, method, with_nans):
+        self.df.groupby("key").transform(method)
 
 
 class RankWithTies:

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -492,6 +492,7 @@ Removal of prior version deprecations/changes
 - Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`)
 - Changed behavior of setitem-like operations (``__setitem__``, ``fillna``, ``where``, ``mask``, ``replace``, ``insert``, fill_value for ``shift``) on an object with :class:`DatetimeTZDtype` when using a value with a non-matching timezone, the value will be cast to the object's timezone instead of casting both to object-dtype (:issue:`44243`)
 - Changed behavior of :class:`Index`, :class:`Series`, :class:`DataFrame` constructors with floating-dtype data and a :class:`DatetimeTZDtype`, the data are now interpreted as UTC-times instead of wall-times, consistent with how integer-dtype data are treated (:issue:`45573`)
+- Changed behavior of :class:`Series` and :class:`DataFrame` constructors with integer dtype and floating-point data containing ``NaN``, this now raises ``IntCastingNaNError`` (:issue:`40110`)
 - Removed the deprecated ``base`` and ``loffset`` arguments from :meth:`pandas.DataFrame.resample`, :meth:`pandas.Series.resample` and :class:`pandas.Grouper`. Use ``offset`` or ``origin`` instead (:issue:`31809`)
 - Changed behavior of :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype and an incompatible ``fill_value``; this now casts to ``object`` dtype instead of raising, consistent with the behavior with other dtypes (:issue:`45746`)
 - Change the default argument of ``regex`` for :meth:`Series.str.replace` from ``True`` to ``False``. Additionally, a single character ``pat`` with ``regex=True`` is now treated as a regular expression instead of a string literal. (:issue:`36695`, :issue:`24804`)

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -14,7 +14,6 @@
     cast,
     overload,
 )
-import warnings
 
 import numpy as np
 from numpy import ma
@@ -29,7 +28,6 @@
     T,
 )
 from pandas.errors import IntCastingNaNError
-from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.base import (
     ExtensionDtype,
@@ -577,16 +575,7 @@ def sanitize_array(
                     subarr = maybe_cast_to_integer_array(data, dtype)
 
             except IntCastingNaNError:
-                warnings.warn(
-                    "In a future version, passing float-dtype values containing NaN "
-                    "and an integer dtype will raise IntCastingNaNError "
-                    "(subclass of ValueError) instead of silently ignoring the "
-                    "passed dtype. To retain the old behavior, call Series(arr) or "
-                    "DataFrame(arr) without passing a dtype.",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
-                subarr = np.array(data, copy=copy)
+                raise
             except ValueError:
                 # Pre-2.0, we would have different behavior for Series vs DataFrame.
                 #  DataFrame would call np.array(data, dtype=dtype, copy=copy),

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -18,6 +18,7 @@
 import pytest
 import pytz
 
+from pandas.errors import IntCastingNaNError
 import pandas.util._test_decorators as td
 
 from pandas.core.dtypes.common import is_integer_dtype
@@ -105,16 +106,13 @@ def test_constructor_dict_with_tzaware_scalar(self):
     def test_construct_ndarray_with_nas_and_int_dtype(self):
         # GH#26919 match Series by not casting np.nan to meaningless int
         arr = np.array([[1, np.nan], [2, 3]])
-        with tm.assert_produces_warning(FutureWarning):
-            df = DataFrame(arr, dtype="i8")
-        assert df.values.dtype == arr.dtype
-        assert isna(df.iloc[0, 1])
+        msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
+        with pytest.raises(IntCastingNaNError, match=msg):
+            DataFrame(arr, dtype="i8")
 
         # check this matches Series behavior
-        with tm.assert_produces_warning(FutureWarning):
-            ser = Series(arr[0], dtype="i8", name=0)
-        expected = df.iloc[0]
-        tm.assert_series_equal(ser, expected)
+        with pytest.raises(IntCastingNaNError, match=msg):
+            Series(arr[0], dtype="i8", name=0)
 
     def test_construct_from_list_of_datetimes(self):
         df = DataFrame([datetime.now(), datetime.now()])
@@ -966,21 +964,16 @@ def _check_basic_constructor(self, empty):
         assert len(frame.index) == 3
         assert len(frame.columns) == 1
 
-        warn = None if empty is np.ones else FutureWarning
-        with tm.assert_produces_warning(warn):
+        if empty is not np.ones:
+            msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
+            with pytest.raises(IntCastingNaNError, match=msg):
+                DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64)
+            return
+        else:
             frame = DataFrame(
                 mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64
             )
-        if empty is np.ones:
-            # passing dtype casts
             assert frame.values.dtype == np.int64
-        else:
-            # i.e. ma.masked_all
-            # Since we have NaNs, refuse to cast to int dtype, which would take NaN
-            #  to meaningless integers.  This matches Series behavior.  GH#26919
-            assert frame.isna().all().all()
-            assert frame.values.dtype == np.float64
-            assert isna(frame.values).all()
 
         # wrong size axis labels
         msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)"
@@ -1741,11 +1734,10 @@ def test_constructor_mix_series_nonseries(self, float_frame):
             DataFrame({"A": float_frame["A"], "B": list(float_frame["B"])[:-2]})
 
     def test_constructor_miscast_na_int_dtype(self):
-        msg = "float-dtype values containing NaN and an integer dtype"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            df = DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64)
-        expected = DataFrame([[np.nan, 1], [1, 0]])
-        tm.assert_frame_equal(df, expected)
+        msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
+
+        with pytest.raises(IntCastingNaNError, match=msg):
+            DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64)
 
     def test_constructor_column_duplicates(self):
         # it works! #2079
@@ -2722,16 +2714,16 @@ def test_floating_values_integer_dtype(self):
 
         # with NaNs, we go through a different path with a different warning
         arr[0, 0] = np.nan
-        msg = "passing float-dtype values containing NaN"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
+        msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
+        with pytest.raises(IntCastingNaNError, match=msg):
             DataFrame(arr, dtype="i8")
-        with tm.assert_produces_warning(FutureWarning, match=msg):
+        with pytest.raises(IntCastingNaNError, match=msg):
             Series(arr[0], dtype="i8")
         # The future (raising) behavior matches what we would get via astype:
         msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
-        with pytest.raises(ValueError, match=msg):
+        with pytest.raises(IntCastingNaNError, match=msg):
             DataFrame(arr).astype("i8")
-        with pytest.raises(ValueError, match=msg):
+        with pytest.raises(IntCastingNaNError, match=msg):
             Series(arr[0]).astype("i8")
 
 

diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -15,6 +15,7 @@
     lib,
 )
 from pandas.compat import is_numpy_dev
+from pandas.errors import IntCastingNaNError
 import pandas.util._test_decorators as td
 
 from pandas.core.dtypes.common import (
@@ -670,10 +671,9 @@ def test_constructor_sanitize(self):
         s = Series(np.array([1.0, 1.0, 8.0]), dtype="i8")
         assert s.dtype == np.dtype("i8")
 
-        msg = "float-dtype values containing NaN and an integer dtype"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            ser = Series(np.array([1.0, 1.0, np.nan]), copy=True, dtype="i8")
-        assert ser.dtype == np.dtype("f8")
+        msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
+        with pytest.raises(IntCastingNaNError, match=msg):
+            Series(np.array([1.0, 1.0, np.nan]), copy=True, dtype="i8")
 
     def test_constructor_copy(self):
         # GH15125
@@ -809,18 +809,17 @@ def test_constructor_floating_data_int_dtype(self, frame_or_series):
         res = frame_or_series(list(arr), dtype="i8")
         tm.assert_equal(res, expected)
 
-        # When we have NaNs, we silently ignore the integer dtype
+        # pre-2.0, when we had NaNs, we silently ignored the integer dtype
         arr[0] = np.nan
         expected = frame_or_series(arr)
-        msg = "passing float-dtype values containing NaN and an integer dtype"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            obj = frame_or_series(arr, dtype="i8")
-        tm.assert_equal(obj, expected)
 
-        with tm.assert_produces_warning(FutureWarning, match=msg):
+        msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
+        with pytest.raises(IntCastingNaNError, match=msg):
+            frame_or_series(arr, dtype="i8")
+
+        with pytest.raises(IntCastingNaNError, match=msg):
             # same behavior if we pass list instead of the ndarray
-            obj = frame_or_series(list(arr), dtype="i8")
-        tm.assert_equal(obj, expected)
+            frame_or_series(list(arr), dtype="i8")
 
         # float array that can be losslessly cast to integers
         arr = np.array([1.0, 2.0], dtype="float64")
@@ -854,13 +853,13 @@ def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtyp
         # Updated: make sure we treat this list the same as we would treat the
         # equivalent ndarray
         vals = [1, 2, np.nan]
-        msg = "In a future version, passing float-dtype values containing NaN"
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            res = Series(vals, dtype=any_int_numpy_dtype)
-        with tm.assert_produces_warning(FutureWarning, match=msg):
-            expected = Series(np.array(vals), dtype=any_int_numpy_dtype)
-        tm.assert_series_equal(res, expected)
-        assert np.isnan(expected.iloc[-1])
+        # pre-2.0 this would return with a float dtype, in 2.0 we raise
+
+        msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
+        with pytest.raises(IntCastingNaNError, match=msg):
+            Series(vals, dtype=any_int_numpy_dtype)
+        with pytest.raises(IntCastingNaNError, match=msg):
+            Series(np.array(vals), dtype=any_int_numpy_dtype)
 
     def test_constructor_dtype_no_cast(self):
         # see gh-1572

diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py
@@ -8,6 +8,7 @@
 import numpy as np
 import pytest
 
+from pandas.errors import IntCastingNaNError
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -100,13 +101,13 @@ def test_construct_dask_float_array_int_dtype_match_ndarray():
     expected = Series(arr, dtype="i8")
     tm.assert_series_equal(res, expected)
 
-    msg = "In a future version, passing float-dtype values containing NaN"
+    msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
     arr[2] = np.nan
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        res = Series(darr, dtype="i8")
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        expected = Series(arr, dtype="i8")
-    tm.assert_series_equal(res, expected)
+    with pytest.raises(IntCastingNaNError, match=msg):
+        Series(darr, dtype="i8")
+    # which is the same as we get with a numpy input
+    with pytest.raises(IntCastingNaNError, match=msg):
+        Series(arr, dtype="i8")
 
 
 def test_xarray(df):