Skip to content

Commit

Permalink
merge master and fix conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
charlesdong1991 committed Jun 12, 2019
1 parent 129aaea commit 0f172d8
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 27 deletions.
6 changes: 5 additions & 1 deletion doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ Other Deprecations
Use the public attributes :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop` and :attr:`~RangeIndex.step` instead (:issue:`26581`).
- The :meth:`Series.ftype`, :meth:`Series.ftypes` and :meth:`DataFrame.ftypes` methods are deprecated and will be removed in a future version.
Instead, use :meth:`Series.dtype` and :meth:`DataFrame.dtypes` (:issue:`26705`).

- :func:`Series.str.replace`, when ``pat`` is single special regex character (such as ``.|\`` etc) and regex is not defined, regex is by default ``False`` for now, but this might be deprecated in the future. (:issue:`24804`)

.. _whatsnew_0250.prior_deprecations:

Expand Down Expand Up @@ -605,10 +605,14 @@ Conversion

Strings
^^^^^^^
- Bug in :func:`Series.str.replace` not applying regex in patterns of length 1 (:issue:`24804`)

<<<<<<< HEAD
=======
- Bug in the ``__name__`` attribute of several methods of :class:`Series.str`, which were set incorrectly (:issue:`23551`)
-
-
>>>>>>> github/master


Interval
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/reshape/melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,8 @@ def melt_stub(df, stub, i, j, value_vars, sep):
newdf = melt(df, id_vars=i, value_vars=value_vars,
value_name=stub.rstrip(sep), var_name=j)
newdf[j] = Categorical(newdf[j])
newdf[j] = newdf[j].str.replace(re.escape(stub + sep), "")
newdf[j] = newdf[j].str.replace(re.escape(stub + sep), "",
regex=True)

# GH17627 Cast numerics suffixes to int/float
newdf[j] = to_numeric(newdf[j], errors='ignore')
Expand Down
16 changes: 13 additions & 3 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ def str_endswith(arr, pat, na=np.nan):
return _na_map(f, arr, na, dtype=bool)


def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=None):
r"""
Replace occurrences of pattern/regex in the Series/Index with
some other string. Equivalent to :meth:`str.replace` or
Expand Down Expand Up @@ -452,9 +452,13 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
flags : int, default 0 (no flags)
- re module flags, e.g. re.IGNORECASE
- Cannot be set if `pat` is a compiled regex
regex : bool, default True
regex : boolean, default None
- If True, assumes the passed-in pattern is a regular expression.
- If False, treats the pattern as a literal string
- If `pat` is a single character and `regex` is not specified, `pat`
is interpreted as a string literal. If `pat` is also a regular
expression symbol, a warning is issued that in the future `pat`
will be interpreted as a regex, rather than a literal.
- Cannot be set to False if `pat` is a compiled regex or `repl` is
a callable.
Expand Down Expand Up @@ -561,7 +565,7 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
# add case flag, if provided
if case is False:
flags |= re.IGNORECASE
if is_compiled_re or len(pat) > 1 or flags or callable(repl):
if is_compiled_re or pat or flags or callable(repl):
n = n if n >= 0 else 0
compiled = re.compile(pat, flags=flags)
f = lambda x: compiled.sub(repl=repl, string=x, count=n)
Expand All @@ -574,6 +578,12 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
if callable(repl):
raise ValueError("Cannot use a callable replacement when "
"regex=False")
# if regex is default None, and a single special character is given
# in pat, still take it as a literal, and raise the Future warning
if regex is None and len(pat) == 1 and pat in list(r"[\^$.|?*+()]"):
warnings.warn("'{}' is interpreted as a literal in ".format(pat) +
"default, not regex. It will change in the future.",
FutureWarning)
f = lambda x: x.replace(pat, repl, n)

return _na_map(f, arr)
Expand Down
105 changes: 83 additions & 22 deletions pandas/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from numpy.random import randint
import pytest

import pandas.compat as compat

from pandas import DataFrame, Index, MultiIndex, Series, concat, isna, notna
import pandas.core.strings as strings
import pandas.util.testing as tm
Expand Down Expand Up @@ -892,27 +894,39 @@ def test_casemethods(self):
def test_replace(self):
values = Series(['fooBAD__barBAD', NA])

result = values.str.replace('BAD[_]*', '')
result = values.str.replace('BAD[_]*', '', regex=True)
exp = Series(['foobar', NA])
tm.assert_series_equal(result, exp)

result = values.str.replace('BAD[_]*', '', n=1)
result = values.str.replace('BAD[_]*', '', regex=True, n=1)
exp = Series(['foobarBAD', NA])
tm.assert_series_equal(result, exp)

# mixed
mixed = Series(['aBAD', NA, 'bBAD', True, datetime.today(), 'fooBAD',
None, 1, 2.])

rs = Series(mixed).str.replace('BAD[_]*', '')
rs = Series(mixed).str.replace('BAD[_]*', '', regex=True)
xp = Series(['a', NA, 'b', NA, NA, 'foo', NA, NA, NA])
assert isinstance(rs, Series)
tm.assert_almost_equal(rs, xp)

# unicode
values = Series([u'fooBAD__barBAD', NA])

result = values.str.replace('BAD[_]*', '', regex=True)
exp = Series([u'foobar', NA])
tm.assert_series_equal(result, exp)

result = values.str.replace('BAD[_]*', '', n=1, regex=True)
exp = Series([u'foobarBAD', NA])
tm.assert_series_equal(result, exp)

# flags + unicode
values = Series([b"abcd,\xc3\xa0".decode("utf-8")])
exp = Series([b"abcd, \xc3\xa0".decode("utf-8")])
result = values.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE)
result = values.str.replace(r"(?<=\w),(?=\w)", ", ", regex=True,
flags=re.UNICODE)
tm.assert_series_equal(result, exp)

# GH 13438
Expand All @@ -930,7 +944,7 @@ def test_replace_callable(self):

# test with callable
repl = lambda m: m.group(0).swapcase()
result = values.str.replace('[a-z][A-Z]{2}', repl, n=2)
result = values.str.replace('[a-z][A-Z]{2}', repl, n=2, regex=True)
exp = Series(['foObaD__baRbaD', NA])
tm.assert_series_equal(result, exp)

Expand All @@ -940,21 +954,21 @@ def test_replace_callable(self):

repl = lambda: None
with pytest.raises(TypeError, match=p_err):
values.str.replace('a', repl)
values.str.replace('a', repl, regex=True)

repl = lambda m, x: None
with pytest.raises(TypeError, match=p_err):
values.str.replace('a', repl)
values.str.replace('a', repl, regex=True)

repl = lambda m, x, y=None: None
with pytest.raises(TypeError, match=p_err):
values.str.replace('a', repl)
values.str.replace('a', repl, regex=True)

# test regex named groups
values = Series(['Foo Bar Baz', NA])
pat = r"(?P<first>\w+) (?P<middle>\w+) (?P<last>\w+)"
repl = lambda m: m.group('middle').swapcase()
result = values.str.replace(pat, repl)
result = values.str.replace(pat, repl, regex=True)
exp = Series(['bAR', NA])
tm.assert_series_equal(result, exp)

Expand All @@ -964,28 +978,39 @@ def test_replace_compiled_regex(self):

# test with compiled regex
pat = re.compile(r'BAD[_]*')
result = values.str.replace(pat, '')
result = values.str.replace(pat, '', regex=True)
exp = Series(['foobar', NA])
tm.assert_series_equal(result, exp)

result = values.str.replace(pat, '', n=1)
result = values.str.replace(pat, '', n=1, regex=True)
exp = Series(['foobarBAD', NA])
tm.assert_series_equal(result, exp)

# mixed
mixed = Series(['aBAD', NA, 'bBAD', True, datetime.today(), 'fooBAD',
None, 1, 2.])

rs = Series(mixed).str.replace(pat, '')
rs = Series(mixed).str.replace(pat, '', regex=True)
xp = Series(['a', NA, 'b', NA, NA, 'foo', NA, NA, NA])
assert isinstance(rs, Series)
tm.assert_almost_equal(rs, xp)

# unicode
values = Series([u'fooBAD__barBAD', NA])

result = values.str.replace(pat, '', regex=True)
exp = Series([u'foobar', NA])
tm.assert_series_equal(result, exp)

result = values.str.replace(pat, '', n=1, regex=True)
exp = Series([u'foobarBAD', NA])
tm.assert_series_equal(result, exp)

# flags + unicode
values = Series([b"abcd,\xc3\xa0".decode("utf-8")])
exp = Series([b"abcd, \xc3\xa0".decode("utf-8")])
pat = re.compile(r"(?<=\w),(?=\w)", flags=re.UNICODE)
result = values.str.replace(pat, ", ")
result = values.str.replace(pat, ", ", regex=True)
tm.assert_series_equal(result, exp)

# case and flags provided to str.replace will have no effect
Expand All @@ -995,29 +1020,30 @@ def test_replace_compiled_regex(self):

with pytest.raises(ValueError,
match="case and flags cannot be"):
result = values.str.replace(pat, '', flags=re.IGNORECASE)
result = values.str.replace(pat, '', flags=re.IGNORECASE,
regex=True)

with pytest.raises(ValueError,
match="case and flags cannot be"):
result = values.str.replace(pat, '', case=False)
result = values.str.replace(pat, '', case=False, regex=True)

with pytest.raises(ValueError,
match="case and flags cannot be"):
result = values.str.replace(pat, '', case=True)
result = values.str.replace(pat, '', case=True, regex=True)

# test with callable
values = Series(['fooBAD__barBAD', NA])
repl = lambda m: m.group(0).swapcase()
pat = re.compile('[a-z][A-Z]{2}')
result = values.str.replace(pat, repl, n=2)
result = values.str.replace(pat, repl, n=2, regex=True)
exp = Series(['foObaD__baRbaD', NA])
tm.assert_series_equal(result, exp)

def test_replace_literal(self):
# GH16808 literal replace (regex=False vs regex=True)
values = Series(['f.o', 'foo', NA])
exp = Series(['bao', 'bao', NA])
result = values.str.replace('f.', 'ba')
result = values.str.replace('f.', 'ba', regex=True)
tm.assert_series_equal(result, exp)

exp = Series(['bao', 'foo', NA])
Expand Down Expand Up @@ -2710,6 +2736,7 @@ def test_partition_deprecation(self):
result = values.str.rpartition(pat='_')
tm.assert_frame_equal(result, expected)

@pytest.mark.filterwarnings("ignore: '|' is interpreted as a literal")
def test_pipe_failures(self):
# #2119
s = Series(['A|B|C'])
Expand All @@ -2719,7 +2746,7 @@ def test_pipe_failures(self):

tm.assert_series_equal(result, exp)

result = s.str.replace('|', ' ')
result = s.str.replace('|', ' ', regex=None)
exp = Series(['A B C'])

tm.assert_series_equal(result, exp)
Expand Down Expand Up @@ -2980,17 +3007,17 @@ def test_replace_moar(self):
s = Series(['A', 'B', 'C', 'Aaba', 'Baca', '', NA, 'CABA',
'dog', 'cat'])

result = s.str.replace('A', 'YYY')
result = s.str.replace('A', 'YYY', regex=True)
expected = Series(['YYY', 'B', 'C', 'YYYaba', 'Baca', '', NA,
'CYYYBYYY', 'dog', 'cat'])
assert_series_equal(result, expected)

result = s.str.replace('A', 'YYY', case=False)
result = s.str.replace('A', 'YYY', case=False, regex=True)
expected = Series(['YYY', 'B', 'C', 'YYYYYYbYYY', 'BYYYcYYY', '', NA,
'CYYYBYYY', 'dog', 'cYYYt'])
assert_series_equal(result, expected)

result = s.str.replace('^.a|dog', 'XX-XX ', case=False)
result = s.str.replace('^.a|dog', 'XX-XX ', case=False, regex=True)
expected = Series(['A', 'B', 'C', 'XX-XX ba', 'XX-XX ca', '', NA,
'XX-XX BA', 'XX-XX ', 'XX-XX t'])
assert_series_equal(result, expected)
Expand Down Expand Up @@ -3162,6 +3189,40 @@ def test_method_on_bytes(self):
match="Cannot use .str.cat with values of.*"):
lhs.str.cat(rhs)

@pytest.mark.filterwarnings("ignore: '.' is interpreted as a literal")
@pytest.mark.parametrize("regex, expected_array", [
(True, ['foofoofoo', 'foofoofoo']),
(False, ['abc', '123']),
(None, ['abc', '123'])
])
def test_replace_single_pattern(self, regex, expected_array):
values = Series(['abc', '123'])
# GH: 24804
result = values.str.replace('.', 'foo', regex=regex)
expected = Series(expected_array)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("input_array, single_char, replace_char, "
"expect_array, warn",
[("a.c", ".", "b", "abc", True),
("a@c", "@", "at", "aatc", False)]
)
def test_replace_warning_single_character(self, input_array,
single_char, replace_char,
expect_array, warn):
# GH: 24804
values = Series([input_array])
if warn:
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = values.str.replace(single_char, replace_char)
else:
result = values.str.replace(single_char, replace_char)

expected = Series([expect_array])
tm.assert_series_equal(result, expected)

@pytest.mark.skipif(compat.PY2, reason='not in python2')
def test_casefold(self):
# GH25405
expected = Series(['ss', NA, 'case', 'ssd'])
Expand Down

0 comments on commit 0f172d8

Please sign in to comment.