From 09c3d01f8598cb75772e86f52b01d725ad51c183 Mon Sep 17 00:00:00 2001 From: PetitLepton Date: Thu, 28 Feb 2019 09:50:12 +0800 Subject: [PATCH 1/2] BUG: Fix regression on DataFrame.replace for regex The commit ensures that the replacement for regex is not confined to the beginning of the string but spans all the characters within. The behaviour is then consistent with versions prior to 0.24.0. One test has been added to account for character replacement when the character is not at the beginning of the string. --- doc/source/whatsnew/v0.24.2.rst | 1 + pandas/core/internals/managers.py | 10 +++++----- pandas/tests/frame/test_replace.py | 7 +++++++ 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index b0f287cf0b9f6..63a83af79246c 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -23,6 +23,7 @@ Fixed Regressions - Fixed regression in :meth:`DataFrame.all` and :meth:`DataFrame.any` where ``bool_only=True`` was ignored (:issue:`25101`) - Fixed issue in ``DataFrame`` construction with passing a mixed list of mixed types could segfault. (:issue:`25075`) - Fixed regression in :meth:`DataFrame.apply` causing ``RecursionError`` when ``dict``-like classes were passed as argument. (:issue:`25196`) +- Fixed regression in :meth:`DataFrame.replace` where ``regex=True`` was only replacing patterns matching the start of the string (:issue:`25259`) .. _whatsnew_0242.enhancements: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 38b719db1709f..f3df252561250 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -552,9 +552,9 @@ def comp(s, regex=False): if isna(s): return isna(values) if hasattr(s, 'asm8'): - return _compare_or_regex_match(maybe_convert_objects(values), + return _compare_or_regex_search(maybe_convert_objects(values), getattr(s, 'asm8'), regex) - return _compare_or_regex_match(values, s, regex) + return _compare_or_regex_search(values, s, regex) masks = [comp(s, regex) for i, s in enumerate(src_list)] @@ -1897,11 +1897,11 @@ def _consolidate(blocks): return new_blocks -def _compare_or_regex_match(a, b, regex=False): +def _compare_or_regex_search(a, b, regex=False): """ Compare two array_like inputs of the same shape or two scalar values - Calls operator.eq or re.match, depending on regex argument. If regex is + Calls operator.eq or re.search, depending on regex argument. If regex is True, perform an element-wise regex matching. Parameters @@ -1917,7 +1917,7 @@ def _compare_or_regex_match(a, b, regex=False): if not regex: op = lambda x: operator.eq(x, b) else: - op = np.vectorize(lambda x: bool(re.match(b, x)) if isinstance(x, str) + op = np.vectorize(lambda x: bool(re.search(b, x)) if isinstance(x, str) else False) is_a_array = isinstance(a, np.ndarray) diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index 219f7a1585fc2..127a64da38ba3 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -466,6 +466,13 @@ def test_regex_replace_dict_nested(self): assert_frame_equal(res3, expec) assert_frame_equal(res4, expec) + def test_regex_replace_dict_nested_non_first_character(self): + # GH 25259 + df = pd.DataFrame({'first': ['abc', 'bca', 'cab']}) + expected = pd.DataFrame({'first': ['.bc', 'bc.', 'c.b']}) + result = df.replace({'a': '.'}, regex=True) + assert_frame_equal(result, expected) + def test_regex_replace_dict_nested_gh4115(self): df = pd.DataFrame({'Type': ['Q', 'T', 'Q', 'Q', 'T'], 'tmp': 2}) expected = DataFrame({'Type': [0, 1, 0, 0, 1], 'tmp': 2}) From 89e8ae5c332bf420f25ccd3dfd3089f818d67f57 Mon Sep 17 00:00:00 2001 From: PetitLepton Date: Thu, 28 Feb 2019 11:16:18 +0800 Subject: [PATCH 2/2] Add space to match indentation --- pandas/core/internals/managers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index f3df252561250..407db772d73e8 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -553,7 +553,7 @@ def comp(s, regex=False): return isna(values) if hasattr(s, 'asm8'): return _compare_or_regex_search(maybe_convert_objects(values), - getattr(s, 'asm8'), regex) + getattr(s, 'asm8'), regex) return _compare_or_regex_search(values, s, regex) masks = [comp(s, regex) for i, s in enumerate(src_list)]