REF: simplify NDFrame.replace, ObjectBlock.replace (#37704)

pandas-dev · Nov 9, 2020 · 19d6a61 · 19d6a61
1 parent 0686736
commit 19d6a61
Show file tree

Hide file tree

Showing 3 changed files with 37 additions and 48 deletions.
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -6744,25 +6744,25 @@ def replace(
                 else:
                     raise TypeError("value argument must be scalar, dict, or Series")
 
-            elif is_list_like(to_replace):  # [NA, ''] -> [0, 'missing']
-                if is_list_like(value):
-                    if len(to_replace) != len(value):
-                        raise ValueError(
-                            f"Replacement lists must match in length. "
-                            f"Expecting {len(to_replace)} got {len(value)} "
-                        )
-                    self._consolidate_inplace()
-                    new_data = self._mgr.replace_list(
-                        src_list=to_replace,
-                        dest_list=value,
-                        inplace=inplace,
-                        regex=regex,
+            elif is_list_like(to_replace):
+                if not is_list_like(value):
+                    # e.g. to_replace = [NA, ''] and value is 0,
+                    #  so we replace NA with 0 and then replace '' with 0
+                    value = [value] * len(to_replace)
+
+                # e.g. we have to_replace = [NA, ''] and value = [0, 'missing']
+                if len(to_replace) != len(value):
+                    raise ValueError(
+                        f"Replacement lists must match in length. "
+                        f"Expecting {len(to_replace)} got {len(value)} "
                     )
+                new_data = self._mgr.replace_list(
+                    src_list=to_replace,
+                    dest_list=value,
+                    inplace=inplace,
+                    regex=regex,
+                )
 
-                else:  # [NA, ''] -> 0
-                    new_data = self._mgr.replace(
-                        to_replace=to_replace, value=value, inplace=inplace, regex=regex
-                    )
             elif to_replace is None:
                 if not (
                     is_re_compilable(regex)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -2502,39 +2502,14 @@ def replace(
         inplace: bool = False,
         regex: bool = False,
     ) -> List["Block"]:
-        to_rep_is_list = is_list_like(to_replace)
-        value_is_list = is_list_like(value)
-        both_lists = to_rep_is_list and value_is_list
-        either_list = to_rep_is_list or value_is_list
+        # Note: the checks we do in NDFrame.replace ensure we never get
+        #  here with listlike to_replace or value, as those cases
+        #  go through _replace_list
 
-        result_blocks: List["Block"] = []
-        blocks: List["Block"] = [self]
-
-        if not either_list and is_re(to_replace):
+        if is_re(to_replace) or regex:
             return self._replace_single(to_replace, value, inplace=inplace, regex=True)
-        elif not (either_list or regex):
+        else:
             return super().replace(to_replace, value, inplace=inplace, regex=regex)
-        elif both_lists:
-            for to_rep, v in zip(to_replace, value):
-                result_blocks = []
-                for b in blocks:
-                    result = b._replace_single(to_rep, v, inplace=inplace, regex=regex)
-                    result_blocks.extend(result)
-                blocks = result_blocks
-            return result_blocks
-
-        elif to_rep_is_list and regex:
-            for to_rep in to_replace:
-                result_blocks = []
-                for b in blocks:
-                    result = b._replace_single(
-                        to_rep, value, inplace=inplace, regex=regex
-                    )
-                    result_blocks.extend(result)
-                blocks = result_blocks
-            return result_blocks
-
-        return self._replace_single(to_replace, value, inplace=inplace, regex=regex)
 
     def _replace_single(
         self,
@@ -2627,6 +2602,19 @@ def re_replacer(s):
 class CategoricalBlock(ExtensionBlock):
     __slots__ = ()
 
+    def _replace_list(
+        self,
+        src_list: List[Any],
+        dest_list: List[Any],
+        inplace: bool = False,
+        regex: bool = False,
+    ) -> List["Block"]:
+        if len(algos.unique(dest_list)) == 1:
+            # We got likely here by tiling value inside NDFrame.replace,
+            #  so un-tile here
+            return self.replace(src_list, dest_list[0], inplace, regex)
+        return super()._replace_list(src_list, dest_list, inplace, regex)
+
     def replace(
         self,
         to_replace,

diff --git a/pandas/tests/arrays/categorical/test_replace.py b/pandas/tests/arrays/categorical/test_replace.py
@@ -21,6 +21,7 @@
         ((1, 2, 4), 5, [5, 5, 3], False),
         ((5, 6), 2, [1, 2, 3], False),
         # many-to-many, handled outside of Categorical and results in separate dtype
+        #  except for cases with only 1 unique entry in `value`
         ([1], [2], [2, 2, 3], True),
         ([1, 4], [5, 2], [5, 2, 3], True),
         # check_categorical sorts categories, which crashes on mixed dtypes
@@ -30,7 +31,7 @@
 )
 def test_replace(to_replace, value, expected, flip_categories):
     # GH 31720
-    stays_categorical = not isinstance(value, list)
+    stays_categorical = not isinstance(value, list) or len(pd.unique(value)) == 1
 
     s = pd.Series([1, 2, 3], dtype="category")
     result = s.replace(to_replace, value)