From 73476bd093b9c80e65b296a8691377b20e6a39e8 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Sat, 13 Jan 2024 00:43:41 +0100 Subject: [PATCH 1/3] depr(python): Deprecate default `delimiter` value for `str.concat` --- py-polars/polars/expr/string.py | 20 ++++++++++++++------ py-polars/polars/series/string.py | 11 ++++++----- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/py-polars/polars/expr/string.py b/py-polars/polars/expr/string.py index 386fff4ae88a..2fb77a061758 100644 --- a/py-polars/polars/expr/string.py +++ b/py-polars/polars/expr/string.py @@ -12,6 +12,7 @@ from polars.utils.deprecation import ( deprecate_renamed_function, deprecate_renamed_parameter, + issue_deprecation_warning, rename_use_earliest_to_ambiguous, ) from polars.utils.various import find_stacklevel @@ -448,9 +449,11 @@ def len_chars(self) -> Expr: """ return wrap_expr(self._pyexpr.str_len_chars()) - def concat(self, delimiter: str = "-", *, ignore_nulls: bool = True) -> Expr: + def concat( + self, delimiter: str | None = None, *, ignore_nulls: bool = True + ) -> Expr: """ - Vertically concat the values in the Series to a single string value. + Vertically concatenate the string values in the column to a single string value. Parameters ---------- @@ -458,9 +461,8 @@ def concat(self, delimiter: str = "-", *, ignore_nulls: bool = True) -> Expr: The delimiter to insert between consecutive string values. ignore_nulls Ignore null values (default). - - If set to ``False``, null values will be propagated. - if the column contains any null values, the output is ``None``. + If set to `False`, null values will be propagated. This means that + if the column contains any null values, the output is null. Returns ------- @@ -479,7 +481,6 @@ def concat(self, delimiter: str = "-", *, ignore_nulls: bool = True) -> Expr: ╞═════╡ │ 1-2 │ └─────┘ - >>> df = pl.DataFrame({"foo": [1, None, 2]}) >>> df.select(pl.col("foo").str.concat("-", ignore_nulls=False)) shape: (1, 1) ┌──────┐ @@ -490,6 +491,13 @@ def concat(self, delimiter: str = "-", *, ignore_nulls: bool = True) -> Expr: │ null │ └──────┘ """ + if delimiter is None: + issue_deprecation_warning( + "The default `delimiter` for `str.concat` will change from '-' to an empty string." + " Pass a delimiter to silence this warning.", + version="0.20.5", + ) + delimiter = "-" return wrap_expr(self._pyexpr.str_concat(delimiter, ignore_nulls)) def to_uppercase(self) -> Expr: diff --git a/py-polars/polars/series/string.py b/py-polars/polars/series/string.py index 7ab2c1a9ce19..3f801c35ec33 100644 --- a/py-polars/polars/series/string.py +++ b/py-polars/polars/series/string.py @@ -385,9 +385,11 @@ def len_chars(self) -> Series: ] """ - def concat(self, delimiter: str = "-", *, ignore_nulls: bool = True) -> Series: + def concat( + self, delimiter: str | None = None, *, ignore_nulls: bool = True + ) -> Series: """ - Vertically concat the values in the Series to a single string value. + Vertically concatenate the string values in the column to a single string value. Parameters ---------- @@ -395,9 +397,8 @@ def concat(self, delimiter: str = "-", *, ignore_nulls: bool = True) -> Series: The delimiter to insert between consecutive string values. ignore_nulls Ignore null values (default). - - If set to ``False``, null values will be propagated. - if the column contains any null values, the output is ``None``. + If set to `False`, null values will be propagated. This means that + if the column contains any null values, the output is null. Returns ------- From 06acd68b704c3568eeeedfb9c28f1952270a9573 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Sat, 13 Jan 2024 00:51:52 +0100 Subject: [PATCH 2/3] Update tests --- .../unit/namespaces/string/test_concat.py | 68 +++++++++++++++++ .../unit/namespaces/string/test_string.py | 75 ------------------- 2 files changed, 68 insertions(+), 75 deletions(-) create mode 100644 py-polars/tests/unit/namespaces/string/test_concat.py diff --git a/py-polars/tests/unit/namespaces/string/test_concat.py b/py-polars/tests/unit/namespaces/string/test_concat.py new file mode 100644 index 000000000000..9e71bec8273d --- /dev/null +++ b/py-polars/tests/unit/namespaces/string/test_concat.py @@ -0,0 +1,68 @@ +from datetime import datetime + +import polars as pl +from polars.testing import assert_series_equal + + +def test_str_concat() -> None: + s = pl.Series(["1", None, "2", None]) + # propagate null + assert_series_equal( + s.str.concat("-", ignore_nulls=False), pl.Series([None], dtype=pl.String) + ) + # ignore null + assert_series_equal(s.str.concat("-"), pl.Series(["1-2"])) + + # str None/null is ok + s = pl.Series(["1", "None", "2", "null"]) + assert_series_equal( + s.str.concat("-", ignore_nulls=False), pl.Series(["1-None-2-null"]) + ) + assert_series_equal(s.str.concat("-"), pl.Series(["1-None-2-null"])) + + +def test_str_concat2() -> None: + df = pl.DataFrame({"foo": [1, None, 2, None]}) + + out = df.select(pl.col("foo").str.concat("-", ignore_nulls=False)) + assert out.item() is None + + out = df.select(pl.col("foo").str.concat("-")) + assert out.item() == "1-2" + + +def test_str_concat_all_null() -> None: + s = pl.Series([None, None, None], dtype=pl.String) + assert_series_equal( + s.str.concat("-", ignore_nulls=False), pl.Series([None], dtype=pl.String) + ) + assert_series_equal(s.str.concat("-", ignore_nulls=True), pl.Series([""])) + + +def test_str_concat_empty_list() -> None: + s = pl.Series([], dtype=pl.String) + assert_series_equal(s.str.concat("-", ignore_nulls=False), pl.Series([""])) + assert_series_equal(s.str.concat("-", ignore_nulls=True), pl.Series([""])) + + +def test_str_concat_empty_list2() -> None: + s = pl.Series([], dtype=pl.String) + df = pl.DataFrame({"foo": s}) + result = df.select(pl.col("foo").str.concat("-")).item() + expected = "" + assert result == expected + + +def test_str_concat_empty_list_agg_context() -> None: + df = pl.DataFrame(data={"i": [1], "v": [None]}, schema_overrides={"v": pl.String}) + result = df.group_by("i").agg(pl.col("v").drop_nulls().str.concat("-"))["v"].item() + expected = "" + assert result == expected + + +def test_str_concat_datetime() -> None: + df = pl.DataFrame({"d": [datetime(2020, 1, 1), None, datetime(2022, 1, 1)]}) + out = df.select(pl.col("d").str.concat("|", ignore_nulls=True)) + assert out.item() == "2020-01-01 00:00:00.000000|2022-01-01 00:00:00.000000" + out = df.select(pl.col("d").str.concat("|", ignore_nulls=False)) + assert out.item() is None diff --git a/py-polars/tests/unit/namespaces/string/test_string.py b/py-polars/tests/unit/namespaces/string/test_string.py index 2291d342e65a..1d34025f539a 100644 --- a/py-polars/tests/unit/namespaces/string/test_string.py +++ b/py-polars/tests/unit/namespaces/string/test_string.py @@ -1,8 +1,5 @@ from __future__ import annotations -from datetime import datetime -from typing import cast - import pytest import polars as pl @@ -49,78 +46,6 @@ def test_str_slice_expr() -> None: df.select(pl.col("a").str.slice(0, -1)) -def test_str_concat() -> None: - s = pl.Series(["1", None, "2", None]) - # propagate null - assert_series_equal( - s.str.concat(ignore_nulls=False), pl.Series([None], dtype=pl.String) - ) - # ignore null - assert_series_equal(s.str.concat(), pl.Series(["1-2"])) - - # str None/null is ok - s = pl.Series(["1", "None", "2", "null"]) - assert_series_equal(s.str.concat(ignore_nulls=False), pl.Series(["1-None-2-null"])) - assert_series_equal(s.str.concat(), pl.Series(["1-None-2-null"])) - - -def test_str_concat2() -> None: - df = pl.DataFrame({"foo": [1, None, 2, None]}) - - out = df.select(pl.col("foo").str.concat("-", ignore_nulls=False)) - assert cast(str, out.item()) is None - - out = df.select(pl.col("foo").str.concat("-")) - assert cast(str, out.item()) == "1-2" - - -def test_str_concat_all_null() -> None: - s = pl.Series([None, None, None], dtype=pl.String) - assert_series_equal( - s.str.concat(ignore_nulls=False), pl.Series([None], dtype=pl.String) - ) - assert_series_equal(s.str.concat(ignore_nulls=True), pl.Series([""])) - - -def test_str_concat_single_null() -> None: - s = pl.Series([None], dtype=pl.String) - assert_series_equal( - s.str.concat(ignore_nulls=False), pl.Series([None], dtype=pl.String) - ) - assert_series_equal(s.str.concat(ignore_nulls=True), pl.Series([""])) - - -def test_str_concat_empty_list() -> None: - s = pl.Series([], dtype=pl.String) - assert_series_equal(s.str.concat(ignore_nulls=False), pl.Series([""])) - assert_series_equal(s.str.concat(ignore_nulls=True), pl.Series([""])) - - -def test_str_concat_empty_list2() -> None: - s = pl.Series([], dtype=pl.String) - df = pl.DataFrame({"foo": s}) - result = df.select(pl.col("foo").str.concat()).item() - expected = "" - assert result == expected - - -def test_str_concat_empty_list_agg_context() -> None: - df = pl.DataFrame(data={"i": [1], "v": [None]}, schema_overrides={"v": pl.String}) - result = df.group_by("i").agg(pl.col("v").drop_nulls().str.concat())["v"].item() - expected = "" - assert result == expected - - -def test_str_concat_datetime() -> None: - df = pl.DataFrame({"d": [datetime(2020, 1, 1), None, datetime(2022, 1, 1)]}) - out = df.select(pl.col("d").str.concat("|", ignore_nulls=True)) - assert ( - cast(str, out.item()) == "2020-01-01 00:00:00.000000|2022-01-01 00:00:00.000000" - ) - out = df.select(pl.col("d").str.concat("|", ignore_nulls=False)) - assert cast(str, out.item()) is None - - def test_str_len_bytes() -> None: s = pl.Series(["Café", None, "345", "東京"]) result = s.str.len_bytes() From 93a874c4ca41d043e133652342469463017a554e Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Sat, 13 Jan 2024 00:54:34 +0100 Subject: [PATCH 3/3] Add deprecation test --- py-polars/tests/unit/namespaces/string/test_concat.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/py-polars/tests/unit/namespaces/string/test_concat.py b/py-polars/tests/unit/namespaces/string/test_concat.py index 9e71bec8273d..78fdc038da3e 100644 --- a/py-polars/tests/unit/namespaces/string/test_concat.py +++ b/py-polars/tests/unit/namespaces/string/test_concat.py @@ -1,5 +1,7 @@ from datetime import datetime +import pytest + import polars as pl from polars.testing import assert_series_equal @@ -66,3 +68,11 @@ def test_str_concat_datetime() -> None: assert out.item() == "2020-01-01 00:00:00.000000|2022-01-01 00:00:00.000000" out = df.select(pl.col("d").str.concat("|", ignore_nulls=False)) assert out.item() is None + + +def test_str_concat_delimiter_deprecated() -> None: + s = pl.Series(["1", None, "2", None]) + with pytest.deprecated_call(): + result = s.str.concat() + expected = pl.Series(["1-2"]) + assert_series_equal(result, expected)