From 04a130f5c86062a15c2dd08e52df34ed3e5adac5 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 26 Aug 2022 11:07:26 +0200 Subject: [PATCH] Backport PR #48215: REGR: properly update DataFrame cache in Series.__setitem__ --- doc/source/whatsnew/v1.4.4.rst | 1 + pandas/core/series.py | 2 +- pandas/tests/frame/indexing/test_setitem.py | 18 ++++++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.4.rst b/doc/source/whatsnew/v1.4.4.rst index deff6e194c3bd..e03e6cd41ebd3 100644 --- a/doc/source/whatsnew/v1.4.4.rst +++ b/doc/source/whatsnew/v1.4.4.rst @@ -26,6 +26,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.loc` setting a length-1 array like value to a single value in the DataFrame (:issue:`46268`) - Fixed regression when slicing with :meth:`DataFrame.loc` with :class:`DateOffset`-index (:issue:`46671`) - Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`) +- Fixed regression in updating a DataFrame column through Series ``__setitem__`` (using chained assignment) not updating column values inplace and using too much memory (:issue:`47172`) - Fixed regression in :meth:`DataFrame.select_dtypes` returning a view on the original DataFrame (:issue:`48090`) - Fixed regression using custom Index subclasses (for example, used in xarray) with :meth:`~DataFrame.reset_index` or :meth:`Index.insert` (:issue:`47071`) - Fixed regression in :meth:`DatetimeIndex.intersection` when the :class:`DatetimeIndex` has dates crossing daylight savings time (:issue:`46702`) diff --git a/pandas/core/series.py b/pandas/core/series.py index 43ad67d36ad4b..d766d846959e2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1140,7 +1140,7 @@ def __setitem__(self, key, value) -> None: self._set_with(key, value) if cacher_needs_updating: - self._maybe_update_cacher() + self._maybe_update_cacher(inplace=True) def _set_with_engine(self, key, value) -> None: loc = self.index.get_loc(key) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index c438dc78ce397..47315f0c96e9e 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1174,3 +1174,21 @@ def test_setitem_not_operating_inplace(self, value, set_value, indexer): view = df[:] df[indexer] = set_value tm.assert_frame_equal(view, expected) + + @td.skip_array_manager_invalid_test + def test_setitem_column_update_inplace(self, using_copy_on_write): + # https://github.com/pandas-dev/pandas/issues/47172 + + labels = [f"c{i}" for i in range(10)] + df = DataFrame({col: np.zeros(len(labels)) for col in labels}, index=labels) + values = df._mgr.blocks[0].values + + for label in df.columns: + df[label][label] = 1 + + if not using_copy_on_write: + # diagonal values all updated + assert np.all(values[np.arange(10), np.arange(10)] == 1) + else: + # original dataframe not updated + assert np.all(values[np.arange(10), np.arange(10)] == 0)