Skip to content

Commit

Permalink
REF: simplify NDFrame.replace, ObjectBlock.replace (#37704)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Nov 9, 2020
1 parent 0686736 commit 19d6a61
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 48 deletions.
34 changes: 17 additions & 17 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6744,25 +6744,25 @@ def replace(
else:
raise TypeError("value argument must be scalar, dict, or Series")

elif is_list_like(to_replace): # [NA, ''] -> [0, 'missing']
if is_list_like(value):
if len(to_replace) != len(value):
raise ValueError(
f"Replacement lists must match in length. "
f"Expecting {len(to_replace)} got {len(value)} "
)
self._consolidate_inplace()
new_data = self._mgr.replace_list(
src_list=to_replace,
dest_list=value,
inplace=inplace,
regex=regex,
elif is_list_like(to_replace):
if not is_list_like(value):
# e.g. to_replace = [NA, ''] and value is 0,
# so we replace NA with 0 and then replace '' with 0
value = [value] * len(to_replace)

# e.g. we have to_replace = [NA, ''] and value = [0, 'missing']
if len(to_replace) != len(value):
raise ValueError(
f"Replacement lists must match in length. "
f"Expecting {len(to_replace)} got {len(value)} "
)
new_data = self._mgr.replace_list(
src_list=to_replace,
dest_list=value,
inplace=inplace,
regex=regex,
)

else: # [NA, ''] -> 0
new_data = self._mgr.replace(
to_replace=to_replace, value=value, inplace=inplace, regex=regex
)
elif to_replace is None:
if not (
is_re_compilable(regex)
Expand Down
48 changes: 18 additions & 30 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2502,39 +2502,14 @@ def replace(
inplace: bool = False,
regex: bool = False,
) -> List["Block"]:
to_rep_is_list = is_list_like(to_replace)
value_is_list = is_list_like(value)
both_lists = to_rep_is_list and value_is_list
either_list = to_rep_is_list or value_is_list
# Note: the checks we do in NDFrame.replace ensure we never get
# here with listlike to_replace or value, as those cases
# go through _replace_list

result_blocks: List["Block"] = []
blocks: List["Block"] = [self]

if not either_list and is_re(to_replace):
if is_re(to_replace) or regex:
return self._replace_single(to_replace, value, inplace=inplace, regex=True)
elif not (either_list or regex):
else:
return super().replace(to_replace, value, inplace=inplace, regex=regex)
elif both_lists:
for to_rep, v in zip(to_replace, value):
result_blocks = []
for b in blocks:
result = b._replace_single(to_rep, v, inplace=inplace, regex=regex)
result_blocks.extend(result)
blocks = result_blocks
return result_blocks

elif to_rep_is_list and regex:
for to_rep in to_replace:
result_blocks = []
for b in blocks:
result = b._replace_single(
to_rep, value, inplace=inplace, regex=regex
)
result_blocks.extend(result)
blocks = result_blocks
return result_blocks

return self._replace_single(to_replace, value, inplace=inplace, regex=regex)

def _replace_single(
self,
Expand Down Expand Up @@ -2627,6 +2602,19 @@ def re_replacer(s):
class CategoricalBlock(ExtensionBlock):
__slots__ = ()

def _replace_list(
self,
src_list: List[Any],
dest_list: List[Any],
inplace: bool = False,
regex: bool = False,
) -> List["Block"]:
if len(algos.unique(dest_list)) == 1:
# We got likely here by tiling value inside NDFrame.replace,
# so un-tile here
return self.replace(src_list, dest_list[0], inplace, regex)
return super()._replace_list(src_list, dest_list, inplace, regex)

def replace(
self,
to_replace,
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/arrays/categorical/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
((1, 2, 4), 5, [5, 5, 3], False),
((5, 6), 2, [1, 2, 3], False),
# many-to-many, handled outside of Categorical and results in separate dtype
# except for cases with only 1 unique entry in `value`
([1], [2], [2, 2, 3], True),
([1, 4], [5, 2], [5, 2, 3], True),
# check_categorical sorts categories, which crashes on mixed dtypes
Expand All @@ -30,7 +31,7 @@
)
def test_replace(to_replace, value, expected, flip_categories):
# GH 31720
stays_categorical = not isinstance(value, list)
stays_categorical = not isinstance(value, list) or len(pd.unique(value)) == 1

s = pd.Series([1, 2, 3], dtype="category")
result = s.replace(to_replace, value)
Expand Down

0 comments on commit 19d6a61

Please sign in to comment.