From 16280b6bfaff89cbb42bd72c16e02424ad5b77df Mon Sep 17 00:00:00 2001 From: xr-chen <826010519@qq.com> Date: Sat, 16 Jul 2022 15:36:58 -0500 Subject: [PATCH 1/5] fix 28277 --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/strings/accessor.py | 2 ++ pandas/tests/strings/test_cat.py | 8 ++++++++ 3 files changed, 11 insertions(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 22a5f2a08362f..7610261deaab5 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -866,6 +866,7 @@ Strings ^^^^^^^ - Bug in :meth:`str.startswith` and :meth:`str.endswith` when using other series as parameter _pat_. Now raises ``TypeError`` (:issue:`3485`) - Bug in :meth:`Series.str.zfill` when strings contain leading signs, padding '0' before the sign character rather than after as ``str.zfill`` from standard library (:issue:`20868`) +- Bug in :meth:`Series.str.cat` when ``others`` is a ``Series.str`` object, only concating longest string (:issue:`28277`) - Interval diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 73d5c04ecd652..658d2d60a8675 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -398,6 +398,8 @@ def _get_series_list(self, others): elif isinstance(others, np.ndarray) and others.ndim == 2: others = DataFrame(others, index=idx) return [others[x] for x in others] + elif isinstance(others, type(self)): + return [others._data] elif is_list_like(others, allow_sets=False): others = list(others) # ensure iterators do not get read twice etc diff --git a/pandas/tests/strings/test_cat.py b/pandas/tests/strings/test_cat.py index 8abbc59343e78..d5b78757ac979 100644 --- a/pandas/tests/strings/test_cat.py +++ b/pandas/tests/strings/test_cat.py @@ -376,3 +376,11 @@ def test_cat_different_classes(klass): result = s.str.cat(klass(["x", "y", "z"])) expected = Series(["ax", "by", "cz"]) tm.assert_series_equal(result, expected) + + +def test_cat_on_series_dot_str(): + ps = Series(["AbC", "de", "FGHI", "j", "kLLLm"]) + + res = ps.str.cat(others=ps.str) + expected = Series(["AbCAbC", "dede", "FGHIFGHI", "jj", "kLLLmkLLLm"]) + tm.assert_series_equal(res, expected) From 6fa08282d905d210d7b7ce3cd4c5823449463d66 Mon Sep 17 00:00:00 2001 From: xr-chen <826010519@qq.com> Date: Sat, 16 Jul 2022 15:54:04 -0500 Subject: [PATCH 2/5] fix typo --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 7610261deaab5..1d24e516856df 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -866,7 +866,7 @@ Strings ^^^^^^^ - Bug in :meth:`str.startswith` and :meth:`str.endswith` when using other series as parameter _pat_. Now raises ``TypeError`` (:issue:`3485`) - Bug in :meth:`Series.str.zfill` when strings contain leading signs, padding '0' before the sign character rather than after as ``str.zfill`` from standard library (:issue:`20868`) -- Bug in :meth:`Series.str.cat` when ``others`` is a ``Series.str`` object, only concating longest string (:issue:`28277`) +- Bug in :meth:`Series.str.cat` only concatenating the longest string when ``others`` is a ``Series.str`` object (:issue:`28277`) - Interval From 8033d67d037818b7806d2ec703fee1565310270a Mon Sep 17 00:00:00 2001 From: xr-chen <826010519@qq.com> Date: Mon, 18 Jul 2022 16:22:06 -0500 Subject: [PATCH 3/5] add test --- doc/source/whatsnew/v1.5.0.rst | 1 - pandas/core/strings/accessor.py | 2 -- pandas/tests/strings/test_cat.py | 19 +++++++++++++++---- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 1d24e516856df..22a5f2a08362f 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -866,7 +866,6 @@ Strings ^^^^^^^ - Bug in :meth:`str.startswith` and :meth:`str.endswith` when using other series as parameter _pat_. Now raises ``TypeError`` (:issue:`3485`) - Bug in :meth:`Series.str.zfill` when strings contain leading signs, padding '0' before the sign character rather than after as ``str.zfill`` from standard library (:issue:`20868`) -- Bug in :meth:`Series.str.cat` only concatenating the longest string when ``others`` is a ``Series.str`` object (:issue:`28277`) - Interval diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 658d2d60a8675..73d5c04ecd652 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -398,8 +398,6 @@ def _get_series_list(self, others): elif isinstance(others, np.ndarray) and others.ndim == 2: others = DataFrame(others, index=idx) return [others[x] for x in others] - elif isinstance(others, type(self)): - return [others._data] elif is_list_like(others, allow_sets=False): others = list(others) # ensure iterators do not get read twice etc diff --git a/pandas/tests/strings/test_cat.py b/pandas/tests/strings/test_cat.py index d5b78757ac979..ff2f19f891f57 100644 --- a/pandas/tests/strings/test_cat.py +++ b/pandas/tests/strings/test_cat.py @@ -379,8 +379,19 @@ def test_cat_different_classes(klass): def test_cat_on_series_dot_str(): + # GH 28277 + # Test future warning of `Series.str.__iter__` ps = Series(["AbC", "de", "FGHI", "j", "kLLLm"]) - - res = ps.str.cat(others=ps.str) - expected = Series(["AbCAbC", "dede", "FGHIFGHI", "jj", "kLLLmkLLLm"]) - tm.assert_series_equal(res, expected) + with tm.assert_produces_warning(FutureWarning): + ps.str.cat(others=ps.str) + # The following code will be uncommented if `Series.str.__iter__` is removed. + """ + message = re.escape( + "others must be Series, Index, DataFrame, np.ndarray " + "or list-like (either containing only strings or " + "containing only objects of type Series/Index/" + "np.ndarray[1-dim])" + ) + with pytest.raises(TypeError, match=message): + ps.str.cat(others=ps.str) + """ From 775bc5ad9373c22489683f487a26b61cb9ebbe23 Mon Sep 17 00:00:00 2001 From: Xingrong Chen <56777910+xr-chen@users.noreply.github.com> Date: Mon, 18 Jul 2022 19:43:24 -0500 Subject: [PATCH 4/5] Update pandas/tests/strings/test_cat.py Co-authored-by: Matthew Roeschke --- pandas/tests/strings/test_cat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/strings/test_cat.py b/pandas/tests/strings/test_cat.py index ff2f19f891f57..fa88b2aadf6ca 100644 --- a/pandas/tests/strings/test_cat.py +++ b/pandas/tests/strings/test_cat.py @@ -384,7 +384,7 @@ def test_cat_on_series_dot_str(): ps = Series(["AbC", "de", "FGHI", "j", "kLLLm"]) with tm.assert_produces_warning(FutureWarning): ps.str.cat(others=ps.str) - # The following code will be uncommented if `Series.str.__iter__` is removed. + # TODO(2.0): The following code can be uncommented when `Series.str.__iter__` is removed. """ message = re.escape( "others must be Series, Index, DataFrame, np.ndarray " From 49c0ef9269624079555d255aed9527ce3f568e9e Mon Sep 17 00:00:00 2001 From: xr-chen <826010519@qq.com> Date: Mon, 18 Jul 2022 19:53:26 -0500 Subject: [PATCH 5/5] fix pep 8 issue, change comment symbol --- pandas/tests/strings/test_cat.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/tests/strings/test_cat.py b/pandas/tests/strings/test_cat.py index fa88b2aadf6ca..4decdff8063a8 100644 --- a/pandas/tests/strings/test_cat.py +++ b/pandas/tests/strings/test_cat.py @@ -384,14 +384,14 @@ def test_cat_on_series_dot_str(): ps = Series(["AbC", "de", "FGHI", "j", "kLLLm"]) with tm.assert_produces_warning(FutureWarning): ps.str.cat(others=ps.str) - # TODO(2.0): The following code can be uncommented when `Series.str.__iter__` is removed. - """ - message = re.escape( - "others must be Series, Index, DataFrame, np.ndarray " - "or list-like (either containing only strings or " - "containing only objects of type Series/Index/" - "np.ndarray[1-dim])" - ) - with pytest.raises(TypeError, match=message): - ps.str.cat(others=ps.str) - """ + # TODO(2.0): The following code can be uncommented + # when `Series.str.__iter__` is removed. + + # message = re.escape( + # "others must be Series, Index, DataFrame, np.ndarray " + # "or list-like (either containing only strings or " + # "containing only objects of type Series/Index/" + # "np.ndarray[1-dim])" + # ) + # with pytest.raises(TypeError, match=message): + # ps.str.cat(others=ps.str)