Backport PR #48254 on branch 1.5.x (REF: avoid FutureWarning about us…

…ing deprecates loc.__setitem__ non-inplace usage) (#48353) Backport PR #48254: REF: avoid FutureWarning about using deprecates loc.__setitem__ non-inplace usage Co-authored-by: jbrockmendel <jbrockmendel@gmail.com>
pandas-dev · Sep 2, 2022 · f0b0630 · f0b0630
1 parent 5987b63
commit f0b0630
Show file tree

Hide file tree

Showing 2 changed files with 66 additions and 9 deletions.
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -6869,14 +6869,48 @@ def fillna(
                         if not is_dict
                         else downcast.get(k)  # type: ignore[union-attr]
                     )
-                    # GH47649
-                    result.loc[:, k] = (
-                        result[k].fillna(v, limit=limit, downcast=downcast_k).values
-                    )
-                    # TODO: result.loc[:, k] = result.loc[:, k].fillna(
-                    #     v, limit=limit, downcast=downcast_k
-                    # )
-                    # Revert when GH45751 is fixed
+
+                    res_k = result[k].fillna(v, limit=limit, downcast=downcast_k)
+
+                    if not inplace:
+                        result[k] = res_k
+                    else:
+                        # We can write into our existing column(s) iff dtype
+                        #  was preserved.
+                        if isinstance(res_k, ABCSeries):
+                            # i.e. 'k' only shows up once in self.columns
+                            if res_k.dtype == result[k].dtype:
+                                result.loc[:, k] = res_k
+                            else:
+                                # Different dtype -> no way to do inplace.
+                                result[k] = res_k
+                        else:
+                            # see test_fillna_dict_inplace_nonunique_columns
+                            locs = result.columns.get_loc(k)
+                            if isinstance(locs, slice):
+                                locs = np.arange(self.shape[1])[locs]
+                            elif (
+                                isinstance(locs, np.ndarray) and locs.dtype.kind == "b"
+                            ):
+                                locs = locs.nonzero()[0]
+                            elif not (
+                                isinstance(locs, np.ndarray) and locs.dtype.kind == "i"
+                            ):
+                                # Should never be reached, but let's cover our bases
+                                raise NotImplementedError(
+                                    "Unexpected get_loc result, please report a bug at "
+                                    "https://github.com/pandas-dev/pandas"
+                                )
+
+                            for i, loc in enumerate(locs):
+                                res_loc = res_k.iloc[:, i]
+                                target = self.iloc[:, loc]
+
+                                if res_loc.dtype == target.dtype:
+                                    result.iloc[:, loc] = res_loc
+                                else:
+                                    result.isetitem(loc, res_loc)
+
                 return result if not inplace else None
 
             elif not is_list_like(value):

diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
@@ -19,6 +19,30 @@
 
 
 class TestFillNA:
+    @td.skip_array_manager_not_yet_implemented
+    def test_fillna_dict_inplace_nonunique_columns(self, using_copy_on_write):
+        df = DataFrame(
+            {"A": [np.nan] * 3, "B": [NaT, Timestamp(1), NaT], "C": [np.nan, "foo", 2]}
+        )
+        df.columns = ["A", "A", "A"]
+        orig = df[:]
+
+        df.fillna({"A": 2}, inplace=True)
+        # The first and third columns can be set inplace, while the second cannot.
+
+        expected = DataFrame(
+            {"A": [2.0] * 3, "B": [2, Timestamp(1), 2], "C": [2, "foo", 2]}
+        )
+        expected.columns = ["A", "A", "A"]
+        tm.assert_frame_equal(df, expected)
+
+        # TODO: what's the expected/desired behavior with CoW?
+        if not using_copy_on_write:
+            assert tm.shares_memory(df.iloc[:, 0], orig.iloc[:, 0])
+        assert not tm.shares_memory(df.iloc[:, 1], orig.iloc[:, 1])
+        if not using_copy_on_write:
+            assert tm.shares_memory(df.iloc[:, 2], orig.iloc[:, 2])
+
     @td.skip_array_manager_not_yet_implemented
     def test_fillna_on_column_view(self, using_copy_on_write):
         # GH#46149 avoid unnecessary copies
@@ -287,7 +311,6 @@ def test_fillna_downcast_noop(self, frame_or_series):
         res3 = obj2.fillna("foo", downcast=np.dtype(np.int32))
         tm.assert_equal(res3, expected)
 
-    @td.skip_array_manager_not_yet_implemented
     @pytest.mark.parametrize("columns", [["A", "A", "B"], ["A", "A"]])
     def test_fillna_dictlike_value_duplicate_colnames(self, columns):
         # GH#43476