From ad9d42a4c847eb9f341dd6743466a4bed70a0a6e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 10 Jan 2022 05:17:35 -0800 Subject: [PATCH] BUG: Series(floatlike, dtype=intlike) inconsistent with non-ndarray data (#45142) --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/core/construction.py | 18 +++++++- pandas/core/dtypes/cast.py | 2 +- pandas/tests/series/test_constructors.py | 58 +++++++++++++++++++++--- 4 files changed, 70 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index e723918ad8b4b..1b28f80df851c 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -137,7 +137,7 @@ Numeric Conversion ^^^^^^^^^^ -- +- Bug in constructing a :class:`Series` from a float-containing list or a floating-dtype ndarray-like (e.g. ``dask.Array``) and an integer dtype raising instead of casting like we would with an ``np.ndarray`` (:issue:`40110`) - Strings diff --git a/pandas/core/construction.py b/pandas/core/construction.py index e496125683c09..a0db2c2157f8f 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -588,7 +588,23 @@ def sanitize_array( data = list(data) if dtype is not None or len(data) == 0: - subarr = _try_cast(data, dtype, copy, raise_cast_failure) + try: + subarr = _try_cast(data, dtype, copy, raise_cast_failure) + except ValueError: + casted = np.array(data, copy=False) + if casted.dtype.kind == "f" and is_integer_dtype(dtype): + # GH#40110 match the behavior we have if we passed + # a ndarray[float] to begin with + return sanitize_array( + casted, + index, + dtype, + copy=False, + raise_cast_failure=raise_cast_failure, + allow_2d=allow_2d, + ) + else: + raise else: subarr = maybe_convert_platform(data) if subarr.dtype == object: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 80271f04d4449..dfa4819fbba5b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1666,7 +1666,7 @@ def maybe_cast_to_integer_array( Also, if you try to coerce float values to integers, it raises: - >>> pd.Series([1, 2, 3.5], dtype="int64") + >>> maybe_cast_to_integer_array([1, 2, 3.5], dtype=np.dtype("int64")) Traceback (most recent call last): ... ValueError: Trying to coerce float values to integers diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 7300b8f03ade6..2411b47e9dd7e 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -754,23 +754,67 @@ def test_constructor_unsigned_dtype_overflow(self, any_unsigned_int_numpy_dtype) with pytest.raises(OverflowError, match=msg): Series([-1], dtype=any_unsigned_int_numpy_dtype) + @td.skip_if_no("dask") + def test_construct_dask_float_array_int_dtype_match_ndarray(self): + # GH#40110 make sure we treat a float-dtype dask array with the same + # rules we would for an ndarray + import dask.dataframe as dd + + arr = np.array([1, 2.5, 3]) + darr = dd.from_array(arr) + + res = Series(darr) + expected = Series(arr) + tm.assert_series_equal(res, expected) + + res = Series(darr, dtype="i8") + expected = Series(arr, dtype="i8") + tm.assert_series_equal(res, expected) + + msg = "In a future version, passing float-dtype values containing NaN" + arr[2] = np.nan + with tm.assert_produces_warning(FutureWarning, match=msg): + res = Series(darr, dtype="i8") + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = Series(arr, dtype="i8") + tm.assert_series_equal(res, expected) + def test_constructor_coerce_float_fail(self, any_int_numpy_dtype): # see gh-15832 - msg = "Trying to coerce float values to integers" - with pytest.raises(ValueError, match=msg): - Series([1, 2, 3.5], dtype=any_int_numpy_dtype) + # Updated: make sure we treat this list the same as we would treat + # the equivalent ndarray + vals = [1, 2, 3.5] + + res = Series(vals, dtype=any_int_numpy_dtype) + expected = Series(np.array(vals), dtype=any_int_numpy_dtype) + tm.assert_series_equal(res, expected) + alt = Series(np.array(vals)) # i.e. we ignore the dtype kwd + tm.assert_series_equal(alt, expected) def test_constructor_coerce_float_valid(self, float_numpy_dtype): s = Series([1, 2, 3.5], dtype=float_numpy_dtype) expected = Series([1, 2, 3.5]).astype(float_numpy_dtype) tm.assert_series_equal(s, expected) - def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtype): + def test_constructor_invalid_coerce_ints_with_float_nan( + self, any_int_numpy_dtype, request + ): # GH 22585 + # Updated: make sure we treat this list the same as we would treat the + # equivalent ndarray + if np_version_under1p19 and np.dtype(any_int_numpy_dtype).kind == "u": + mark = pytest.mark.xfail(reason="Produces an extra RuntimeWarning") + request.node.add_marker(mark) - msg = "cannot convert float NaN to integer" - with pytest.raises(ValueError, match=msg): - Series([1, 2, np.nan], dtype=any_int_numpy_dtype) + vals = [1, 2, np.nan] + + msg = "In a future version, passing float-dtype values containing NaN" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = Series(vals, dtype=any_int_numpy_dtype) + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = Series(np.array(vals), dtype=any_int_numpy_dtype) + tm.assert_series_equal(res, expected) + assert np.isnan(expected.iloc[-1]) def test_constructor_dtype_no_cast(self): # see gh-1572