From f0b0630e0c843caa55ce84e9b6c0234893fffddb Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Fri, 2 Sep 2022 18:21:43 +0200 Subject: [PATCH] Backport PR #48254 on branch 1.5.x (REF: avoid FutureWarning about using deprecates loc.__setitem__ non-inplace usage) (#48353) Backport PR #48254: REF: avoid FutureWarning about using deprecates loc.__setitem__ non-inplace usage Co-authored-by: jbrockmendel --- pandas/core/generic.py | 50 +++++++++++++++++++---- pandas/tests/frame/methods/test_fillna.py | 25 +++++++++++- 2 files changed, 66 insertions(+), 9 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index abab32ae145bd..7b345a58bda88 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6869,14 +6869,48 @@ def fillna( if not is_dict else downcast.get(k) # type: ignore[union-attr] ) - # GH47649 - result.loc[:, k] = ( - result[k].fillna(v, limit=limit, downcast=downcast_k).values - ) - # TODO: result.loc[:, k] = result.loc[:, k].fillna( - # v, limit=limit, downcast=downcast_k - # ) - # Revert when GH45751 is fixed + + res_k = result[k].fillna(v, limit=limit, downcast=downcast_k) + + if not inplace: + result[k] = res_k + else: + # We can write into our existing column(s) iff dtype + # was preserved. + if isinstance(res_k, ABCSeries): + # i.e. 'k' only shows up once in self.columns + if res_k.dtype == result[k].dtype: + result.loc[:, k] = res_k + else: + # Different dtype -> no way to do inplace. + result[k] = res_k + else: + # see test_fillna_dict_inplace_nonunique_columns + locs = result.columns.get_loc(k) + if isinstance(locs, slice): + locs = np.arange(self.shape[1])[locs] + elif ( + isinstance(locs, np.ndarray) and locs.dtype.kind == "b" + ): + locs = locs.nonzero()[0] + elif not ( + isinstance(locs, np.ndarray) and locs.dtype.kind == "i" + ): + # Should never be reached, but let's cover our bases + raise NotImplementedError( + "Unexpected get_loc result, please report a bug at " + "https://github.com/pandas-dev/pandas" + ) + + for i, loc in enumerate(locs): + res_loc = res_k.iloc[:, i] + target = self.iloc[:, loc] + + if res_loc.dtype == target.dtype: + result.iloc[:, loc] = res_loc + else: + result.isetitem(loc, res_loc) + return result if not inplace else None elif not is_list_like(value): diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 8355502c47c61..4cf6706707569 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -19,6 +19,30 @@ class TestFillNA: + @td.skip_array_manager_not_yet_implemented + def test_fillna_dict_inplace_nonunique_columns(self, using_copy_on_write): + df = DataFrame( + {"A": [np.nan] * 3, "B": [NaT, Timestamp(1), NaT], "C": [np.nan, "foo", 2]} + ) + df.columns = ["A", "A", "A"] + orig = df[:] + + df.fillna({"A": 2}, inplace=True) + # The first and third columns can be set inplace, while the second cannot. + + expected = DataFrame( + {"A": [2.0] * 3, "B": [2, Timestamp(1), 2], "C": [2, "foo", 2]} + ) + expected.columns = ["A", "A", "A"] + tm.assert_frame_equal(df, expected) + + # TODO: what's the expected/desired behavior with CoW? + if not using_copy_on_write: + assert tm.shares_memory(df.iloc[:, 0], orig.iloc[:, 0]) + assert not tm.shares_memory(df.iloc[:, 1], orig.iloc[:, 1]) + if not using_copy_on_write: + assert tm.shares_memory(df.iloc[:, 2], orig.iloc[:, 2]) + @td.skip_array_manager_not_yet_implemented def test_fillna_on_column_view(self, using_copy_on_write): # GH#46149 avoid unnecessary copies @@ -287,7 +311,6 @@ def test_fillna_downcast_noop(self, frame_or_series): res3 = obj2.fillna("foo", downcast=np.dtype(np.int32)) tm.assert_equal(res3, expected) - @td.skip_array_manager_not_yet_implemented @pytest.mark.parametrize("columns", [["A", "A", "B"], ["A", "A"]]) def test_fillna_dictlike_value_duplicate_colnames(self, columns): # GH#43476