Skip to content

Commit

Permalink
BUG: Series.setitem losing precision when enlarging (#47342)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl committed Jul 1, 2022
1 parent 55d9dcf commit bd9a6f0
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 3 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,7 @@ Indexing
- Bug in :meth:`DataFrame.loc` when setting values to a column and right hand side is a dictionary (:issue:`47216`)
- Bug in :meth:`Series.__setitem__` with ``datetime64[ns]`` dtype, an all-``False`` boolean mask, and an incompatible value incorrectly casting to ``object`` instead of retaining ``datetime64[ns]`` dtype (:issue:`45967`)
- Bug in :meth:`Index.__getitem__` raising ``ValueError`` when indexer is from boolean dtype with ``NA`` (:issue:`45806`)
- Bug in :meth:`Series.__setitem__` losing precision when enlarging :class:`Series` with scalar (:issue:`32346`)
- Bug in :meth:`Series.mask` with ``inplace=True`` or setting values with a boolean mask with small integer dtypes incorrectly raising (:issue:`45750`)
- Bug in :meth:`DataFrame.mask` with ``inplace=True`` and ``ExtensionDtype`` columns incorrectly raising (:issue:`45577`)
- Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`)
Expand Down
26 changes: 23 additions & 3 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@
from pandas.util._decorators import doc
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.cast import can_hold_element
from pandas.core.dtypes.cast import (
can_hold_element,
maybe_promote,
)
from pandas.core.dtypes.common import (
is_array_like,
is_bool_dtype,
Expand All @@ -42,7 +45,9 @@
)
from pandas.core.dtypes.missing import (
infer_fill_value,
is_valid_na_for_dtype,
isna,
na_value_for_dtype,
)

from pandas.core import algorithms as algos
Expand Down Expand Up @@ -2087,8 +2092,23 @@ def _setitem_with_indexer_missing(self, indexer, value):
# We get only here with loc, so can hard code
return self._setitem_with_indexer(new_indexer, value, "loc")

# this preserves dtype of the value
new_values = Series([value])._values
# this preserves dtype of the value and of the object
if is_valid_na_for_dtype(value, self.obj.dtype):
value = na_value_for_dtype(self.obj.dtype, compat=False)
new_dtype = maybe_promote(self.obj.dtype, value)[0]
elif isna(value):
new_dtype = None
elif not self.obj.empty and not is_object_dtype(self.obj.dtype):
# We should not cast, if we have object dtype because we can
# set timedeltas into object series
curr_dtype = self.obj.dtype
curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype)
new_dtype = maybe_promote(curr_dtype, value)[0]
else:
new_dtype = None

new_values = Series([value], dtype=new_dtype)._values

if len(self.obj._values):
# GH#22717 handle casting compatibility that np.concatenate
# does incorrectly
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,33 @@ def test_setitem_not_contained(self, string_series):
expected = concat([string_series, app])
tm.assert_series_equal(ser, expected)

def test_setitem_keep_precision(self, any_numeric_ea_dtype):
# GH#32346
ser = Series([1, 2], dtype=any_numeric_ea_dtype)
ser[2] = 10
expected = Series([1, 2, 10], dtype=any_numeric_ea_dtype)
tm.assert_series_equal(ser, expected)

@pytest.mark.parametrize("indexer", [1, 2])
@pytest.mark.parametrize(
"na, target_na, dtype, target_dtype",
[
(NA, NA, "Int64", "Int64"),
(NA, np.nan, "int64", "float64"),
(NaT, NaT, "int64", "object"),
(np.nan, NA, "Int64", "Int64"),
(np.nan, NA, "Float64", "Float64"),
(np.nan, np.nan, "int64", "float64"),
],
)
def test_setitem_enlarge_with_na(self, na, target_na, dtype, target_dtype, indexer):
# GH#32346
ser = Series([1, 2], dtype=dtype)
ser[indexer] = na
expected_values = [1, target_na] if indexer == 1 else [1, 2, target_na]
expected = Series(expected_values, dtype=target_dtype)
tm.assert_series_equal(ser, expected)


def test_setitem_scalar_into_readonly_backing_data():
# GH#14359: test that you cannot mutate a read only buffer
Expand Down

0 comments on commit bd9a6f0

Please sign in to comment.