Skip to content

Commit

Permalink
BUG: Series(floatlike, dtype=intlike) inconsistent with non-ndarray d…
Browse files Browse the repository at this point in the history
…ata (#45142)
  • Loading branch information
jbrockmendel committed Jan 10, 2022
1 parent b17cedd commit ad9d42a
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 10 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ Numeric

Conversion
^^^^^^^^^^
-
- Bug in constructing a :class:`Series` from a float-containing list or a floating-dtype ndarray-like (e.g. ``dask.Array``) and an integer dtype raising instead of casting like we would with an ``np.ndarray`` (:issue:`40110`)
-

Strings
Expand Down
18 changes: 17 additions & 1 deletion pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,23 @@ def sanitize_array(
data = list(data)

if dtype is not None or len(data) == 0:
subarr = _try_cast(data, dtype, copy, raise_cast_failure)
try:
subarr = _try_cast(data, dtype, copy, raise_cast_failure)
except ValueError:
casted = np.array(data, copy=False)
if casted.dtype.kind == "f" and is_integer_dtype(dtype):
# GH#40110 match the behavior we have if we passed
# a ndarray[float] to begin with
return sanitize_array(
casted,
index,
dtype,
copy=False,
raise_cast_failure=raise_cast_failure,
allow_2d=allow_2d,
)
else:
raise
else:
subarr = maybe_convert_platform(data)
if subarr.dtype == object:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1666,7 +1666,7 @@ def maybe_cast_to_integer_array(
Also, if you try to coerce float values to integers, it raises:
>>> pd.Series([1, 2, 3.5], dtype="int64")
>>> maybe_cast_to_integer_array([1, 2, 3.5], dtype=np.dtype("int64"))
Traceback (most recent call last):
...
ValueError: Trying to coerce float values to integers
Expand Down
58 changes: 51 additions & 7 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -754,23 +754,67 @@ def test_constructor_unsigned_dtype_overflow(self, any_unsigned_int_numpy_dtype)
with pytest.raises(OverflowError, match=msg):
Series([-1], dtype=any_unsigned_int_numpy_dtype)

@td.skip_if_no("dask")
def test_construct_dask_float_array_int_dtype_match_ndarray(self):
# GH#40110 make sure we treat a float-dtype dask array with the same
# rules we would for an ndarray
import dask.dataframe as dd

arr = np.array([1, 2.5, 3])
darr = dd.from_array(arr)

res = Series(darr)
expected = Series(arr)
tm.assert_series_equal(res, expected)

res = Series(darr, dtype="i8")
expected = Series(arr, dtype="i8")
tm.assert_series_equal(res, expected)

msg = "In a future version, passing float-dtype values containing NaN"
arr[2] = np.nan
with tm.assert_produces_warning(FutureWarning, match=msg):
res = Series(darr, dtype="i8")
with tm.assert_produces_warning(FutureWarning, match=msg):
expected = Series(arr, dtype="i8")
tm.assert_series_equal(res, expected)

def test_constructor_coerce_float_fail(self, any_int_numpy_dtype):
# see gh-15832
msg = "Trying to coerce float values to integers"
with pytest.raises(ValueError, match=msg):
Series([1, 2, 3.5], dtype=any_int_numpy_dtype)
# Updated: make sure we treat this list the same as we would treat
# the equivalent ndarray
vals = [1, 2, 3.5]

res = Series(vals, dtype=any_int_numpy_dtype)
expected = Series(np.array(vals), dtype=any_int_numpy_dtype)
tm.assert_series_equal(res, expected)
alt = Series(np.array(vals)) # i.e. we ignore the dtype kwd
tm.assert_series_equal(alt, expected)

def test_constructor_coerce_float_valid(self, float_numpy_dtype):
s = Series([1, 2, 3.5], dtype=float_numpy_dtype)
expected = Series([1, 2, 3.5]).astype(float_numpy_dtype)
tm.assert_series_equal(s, expected)

def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtype):
def test_constructor_invalid_coerce_ints_with_float_nan(
self, any_int_numpy_dtype, request
):
# GH 22585
# Updated: make sure we treat this list the same as we would treat the
# equivalent ndarray
if np_version_under1p19 and np.dtype(any_int_numpy_dtype).kind == "u":
mark = pytest.mark.xfail(reason="Produces an extra RuntimeWarning")
request.node.add_marker(mark)

msg = "cannot convert float NaN to integer"
with pytest.raises(ValueError, match=msg):
Series([1, 2, np.nan], dtype=any_int_numpy_dtype)
vals = [1, 2, np.nan]

msg = "In a future version, passing float-dtype values containing NaN"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = Series(vals, dtype=any_int_numpy_dtype)
with tm.assert_produces_warning(FutureWarning, match=msg):
expected = Series(np.array(vals), dtype=any_int_numpy_dtype)
tm.assert_series_equal(res, expected)
assert np.isnan(expected.iloc[-1])

def test_constructor_dtype_no_cast(self):
# see gh-1572
Expand Down

0 comments on commit ad9d42a

Please sign in to comment.