-
-
Notifications
You must be signed in to change notification settings - Fork 18.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
API/BUG: Raise when int-dtype coercions fail #21456
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ | |
is_dtype_equal, | ||
is_float_dtype, is_complex_dtype, | ||
is_integer_dtype, | ||
is_unsigned_integer_dtype, | ||
is_datetime_or_timedelta_dtype, | ||
is_bool_dtype, is_scalar, | ||
is_string_dtype, _string_dtypes, | ||
|
@@ -1269,3 +1270,74 @@ def construct_1d_ndarray_preserving_na(values, dtype=None, copy=False): | |
subarr = subarr2 | ||
|
||
return subarr | ||
|
||
|
||
def maybe_cast_to_integer_array(arr, dtype, copy=False): | ||
""" | ||
Takes any dtype and returns the casted version, raising for when data is | ||
incompatible with integer/unsigned integer dtypes. | ||
|
||
.. versionadded:: 0.24.0 | ||
|
||
Parameters | ||
---------- | ||
arr : array-like | ||
The array to cast. | ||
dtype : str, np.dtype | ||
The integer dtype to cast the array to. | ||
copy: boolean, default False | ||
Whether to make a copy of the array before returning. | ||
|
||
Returns | ||
------- | ||
int_arr : ndarray | ||
An array of integer or unsigned integer dtype | ||
|
||
Raises | ||
------ | ||
OverflowError : the dtype is incompatible with the data | ||
ValueError : loss of precision has occurred during casting | ||
|
||
Examples | ||
-------- | ||
If you try to coerce negative values to unsigned integers, it raises: | ||
|
||
>>> Series([-1], dtype="uint64") | ||
Traceback (most recent call last): | ||
... | ||
OverflowError: Trying to coerce negative values to unsigned integers | ||
|
||
Also, if you try to coerce float values to integers, it raises: | ||
|
||
>>> Series([1, 2, 3.5], dtype="int64") | ||
Traceback (most recent call last): | ||
... | ||
ValueError: Trying to coerce float values to integers | ||
""" | ||
|
||
try: | ||
if not hasattr(arr, "astype"): | ||
casted = np.array(arr, dtype=dtype, copy=copy) | ||
else: | ||
casted = arr.astype(dtype, copy=copy) | ||
except OverflowError: | ||
raise OverflowError("The elements provided in the data cannot all be " | ||
"casted to the dtype {dtype}".format(dtype=dtype)) | ||
|
||
if np.array_equal(arr, casted): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
return casted | ||
|
||
# We do this casting to allow for proper | ||
# data and dtype checking. | ||
# | ||
# We didn't do this earlier because NumPy | ||
# doesn't handle `uint64` correctly. | ||
arr = np.asarray(arr) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do you need this? at this point its either an ndarray, Series, Index, which is all ok here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not quite. |
||
|
||
if is_unsigned_integer_dtype(dtype) and (arr < 0).any(): | ||
raise OverflowError("Trying to coerce negative values " | ||
"to unsigned integers") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gfyoung should maybe_cast_to_integer_array also check for overflows like
? |
||
|
||
if is_integer_dtype(dtype) and (is_float_dtype(arr) or | ||
is_object_dtype(arr)): | ||
raise ValueError("Trying to coerce float values to integers") |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -542,12 +542,30 @@ def test_constructor_pass_nan_nat(self): | |
tm.assert_series_equal(Series(np.array([np.nan, pd.NaT])), exp) | ||
|
||
def test_constructor_cast(self): | ||
pytest.raises(ValueError, Series, ['a', 'b', 'c'], dtype=float) | ||
msg = "could not convert string to float" | ||
with tm.assert_raises_regex(ValueError, msg): | ||
Series(["a", "b", "c"], dtype=float) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we have these tests for Index as well (I think we do)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 😮 : That's a bug! >>> Index(["a", "b", "c"], dtype=float)
Index([["a", "b", "c"], dtype=object) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See #21311 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jschendel @jreback : I'll add an |
||
def test_constructor_unsigned_dtype_overflow(self, uint_dtype): | ||
# see gh-15832 | ||
msg = 'Trying to coerce negative values to unsigned integers' | ||
with tm.assert_raises_regex(OverflowError, msg): | ||
Series([-1], dtype=uint_dtype) | ||
|
||
def test_constructor_coerce_float_fail(self, any_int_dtype): | ||
# see gh-15832 | ||
msg = "Trying to coerce float values to integers" | ||
with tm.assert_raises_regex(ValueError, msg): | ||
Series([1, 2, 3.5], dtype=any_int_dtype) | ||
|
||
def test_constructor_coerce_float_valid(self, float_dtype): | ||
s = Series([1, 2, 3.5], dtype=float_dtype) | ||
expected = Series([1, 2, 3.5]).astype(float_dtype) | ||
assert_series_equal(s, expected) | ||
|
||
def test_constructor_dtype_nocast(self): | ||
# 1572 | ||
def test_constructor_dtype_no_cast(self): | ||
# see gh-1572 | ||
s = Series([1, 2, 3]) | ||
|
||
s2 = Series(s, dtype=np.int64) | ||
|
||
s2[1] = 5 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this duplicates maybe_downcast_to_dtype which is used internally, rather have the doc-string of that updated / examples (and can add the copy=)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not quite. We're not always down-casting e.g.
Silly? Yes, but it should work.