BUG: Series(floatlike, dtype=intlike) inconsistent with non-ndarray d…

…ata (#45142)
pandas-dev · Jan 10, 2022 · ad9d42a · ad9d42a
1 parent b17cedd
commit ad9d42a
Show file tree

Hide file tree

Showing 4 changed files with 70 additions and 10 deletions.
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -137,7 +137,7 @@ Numeric
 
 Conversion
 ^^^^^^^^^^
--
+- Bug in constructing a :class:`Series` from a float-containing list or a floating-dtype ndarray-like (e.g. ``dask.Array``) and an integer dtype raising instead of casting like we would with an ``np.ndarray`` (:issue:`40110`)
 -
 
 Strings

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -588,7 +588,23 @@ def sanitize_array(
             data = list(data)
 
         if dtype is not None or len(data) == 0:
-            subarr = _try_cast(data, dtype, copy, raise_cast_failure)
+            try:
+                subarr = _try_cast(data, dtype, copy, raise_cast_failure)
+            except ValueError:
+                casted = np.array(data, copy=False)
+                if casted.dtype.kind == "f" and is_integer_dtype(dtype):
+                    # GH#40110 match the behavior we have if we passed
+                    #  a ndarray[float] to begin with
+                    return sanitize_array(
+                        casted,
+                        index,
+                        dtype,
+                        copy=False,
+                        raise_cast_failure=raise_cast_failure,
+                        allow_2d=allow_2d,
+                    )
+                else:
+                    raise
         else:
             subarr = maybe_convert_platform(data)
             if subarr.dtype == object:

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1666,7 +1666,7 @@ def maybe_cast_to_integer_array(
 
     Also, if you try to coerce float values to integers, it raises:
 
-    >>> pd.Series([1, 2, 3.5], dtype="int64")
+    >>> maybe_cast_to_integer_array([1, 2, 3.5], dtype=np.dtype("int64"))
     Traceback (most recent call last):
         ...
     ValueError: Trying to coerce float values to integers

diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -754,23 +754,67 @@ def test_constructor_unsigned_dtype_overflow(self, any_unsigned_int_numpy_dtype)
         with pytest.raises(OverflowError, match=msg):
             Series([-1], dtype=any_unsigned_int_numpy_dtype)
 
+    @td.skip_if_no("dask")
+    def test_construct_dask_float_array_int_dtype_match_ndarray(self):
+        # GH#40110 make sure we treat a float-dtype dask array with the same
+        #  rules we would for an ndarray
+        import dask.dataframe as dd
+
+        arr = np.array([1, 2.5, 3])
+        darr = dd.from_array(arr)
+
+        res = Series(darr)
+        expected = Series(arr)
+        tm.assert_series_equal(res, expected)
+
+        res = Series(darr, dtype="i8")
+        expected = Series(arr, dtype="i8")
+        tm.assert_series_equal(res, expected)
+
+        msg = "In a future version, passing float-dtype values containing NaN"
+        arr[2] = np.nan
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            res = Series(darr, dtype="i8")
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = Series(arr, dtype="i8")
+        tm.assert_series_equal(res, expected)
+
     def test_constructor_coerce_float_fail(self, any_int_numpy_dtype):
         # see gh-15832
-        msg = "Trying to coerce float values to integers"
-        with pytest.raises(ValueError, match=msg):
-            Series([1, 2, 3.5], dtype=any_int_numpy_dtype)
+        # Updated: make sure we treat this list the same as we would treat
+        #  the equivalent ndarray
+        vals = [1, 2, 3.5]
+
+        res = Series(vals, dtype=any_int_numpy_dtype)
+        expected = Series(np.array(vals), dtype=any_int_numpy_dtype)
+        tm.assert_series_equal(res, expected)
+        alt = Series(np.array(vals))  # i.e. we ignore the dtype kwd
+        tm.assert_series_equal(alt, expected)
 
     def test_constructor_coerce_float_valid(self, float_numpy_dtype):
         s = Series([1, 2, 3.5], dtype=float_numpy_dtype)
         expected = Series([1, 2, 3.5]).astype(float_numpy_dtype)
         tm.assert_series_equal(s, expected)
 
-    def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtype):
+    def test_constructor_invalid_coerce_ints_with_float_nan(
+        self, any_int_numpy_dtype, request
+    ):
         # GH 22585
+        # Updated: make sure we treat this list the same as we would treat the
+        #  equivalent ndarray
+        if np_version_under1p19 and np.dtype(any_int_numpy_dtype).kind == "u":
+            mark = pytest.mark.xfail(reason="Produces an extra RuntimeWarning")
+            request.node.add_marker(mark)
 
-        msg = "cannot convert float NaN to integer"
-        with pytest.raises(ValueError, match=msg):
-            Series([1, 2, np.nan], dtype=any_int_numpy_dtype)
+        vals = [1, 2, np.nan]
+
+        msg = "In a future version, passing float-dtype values containing NaN"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            res = Series(vals, dtype=any_int_numpy_dtype)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = Series(np.array(vals), dtype=any_int_numpy_dtype)
+        tm.assert_series_equal(res, expected)
+        assert np.isnan(expected.iloc[-1])
 
     def test_constructor_dtype_no_cast(self):
         # see gh-1572