Skip to content

Commit

Permalink
BUG: make Series handle dtype='int64' for string array (#48333)
Browse files Browse the repository at this point in the history
* make sure conversion is not lossy

* add whatsnew and ref issue in test

* raise earlier

Note the comment at the last line:
# No known cases that get here

* mv whatsnew entry to 1.6.0.rst
  • Loading branch information
5j9 authored Sep 12, 2022
1 parent 51396d8 commit 9f9d80f
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 1 deletion.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.6.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ Numeric

Conversion
^^^^^^^^^^
-
- Bug in constructing :class:`Series` with ``int64`` dtype from a string list raising instead of casting (:issue:`44923`)
-

Strings
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1851,6 +1851,11 @@ def maybe_cast_to_integer_array(
# doesn't handle `uint64` correctly.
arr = np.asarray(arr)

if np.issubdtype(arr.dtype, str):
if (casted.astype(str) == arr).all():
return casted
raise ValueError(f"string values cannot be losslessly cast to {dtype}")

if is_unsigned_integer_dtype(dtype) and (arr < 0).any():
raise OverflowError("Trying to coerce negative values to unsigned integers")

Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1881,6 +1881,19 @@ def test_constructor_bool_dtype_missing_values(self):
expected = Series(True, index=[0], dtype="bool")
tm.assert_series_equal(result, expected)

def test_constructor_int64_dtype(self, any_int_dtype):
# GH#44923
result = Series(["0", "1", "2"], dtype=any_int_dtype)
expected = Series([0, 1, 2], dtype=any_int_dtype)
tm.assert_series_equal(result, expected)

def test_constructor_raise_on_lossy_conversion_of_strings(self):
# GH#44923
with pytest.raises(
ValueError, match="string values cannot be losslessly cast to int8"
):
Series(["128"], dtype="int8")

def test_constructor_dtype_timedelta_alternative_construct(self):
# GH#35465
result = Series([1000000, 200000, 3000000], dtype="timedelta64[ns]")
Expand Down

0 comments on commit 9f9d80f

Please sign in to comment.