Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

depr(python): Deprecate default delimiter value for str.concat #13690

Merged
merged 3 commits into from
Jan 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions py-polars/polars/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from polars.utils.deprecation import (
deprecate_renamed_function,
deprecate_renamed_parameter,
issue_deprecation_warning,
rename_use_earliest_to_ambiguous,
)
from polars.utils.various import find_stacklevel
Expand Down Expand Up @@ -448,19 +449,20 @@ def len_chars(self) -> Expr:
"""
return wrap_expr(self._pyexpr.str_len_chars())

def concat(self, delimiter: str = "-", *, ignore_nulls: bool = True) -> Expr:
def concat(
self, delimiter: str | None = None, *, ignore_nulls: bool = True
) -> Expr:
"""
Vertically concat the values in the Series to a single string value.
Vertically concatenate the string values in the column to a single string value.

Parameters
----------
delimiter
The delimiter to insert between consecutive string values.
ignore_nulls
Ignore null values (default).

If set to ``False``, null values will be propagated.
if the column contains any null values, the output is ``None``.
If set to `False`, null values will be propagated. This means that
if the column contains any null values, the output is null.

Returns
-------
Expand All @@ -479,7 +481,6 @@ def concat(self, delimiter: str = "-", *, ignore_nulls: bool = True) -> Expr:
╞═════╡
│ 1-2 │
└─────┘
>>> df = pl.DataFrame({"foo": [1, None, 2]})
>>> df.select(pl.col("foo").str.concat("-", ignore_nulls=False))
shape: (1, 1)
┌──────┐
Expand All @@ -490,6 +491,13 @@ def concat(self, delimiter: str = "-", *, ignore_nulls: bool = True) -> Expr:
│ null │
└──────┘
"""
if delimiter is None:
issue_deprecation_warning(
"The default `delimiter` for `str.concat` will change from '-' to an empty string."
" Pass a delimiter to silence this warning.",
version="0.20.5",
)
delimiter = "-"
return wrap_expr(self._pyexpr.str_concat(delimiter, ignore_nulls))

def to_uppercase(self) -> Expr:
Expand Down
11 changes: 6 additions & 5 deletions py-polars/polars/series/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,19 +385,20 @@ def len_chars(self) -> Series:
]
"""

def concat(self, delimiter: str = "-", *, ignore_nulls: bool = True) -> Series:
def concat(
self, delimiter: str | None = None, *, ignore_nulls: bool = True
) -> Series:
"""
Vertically concat the values in the Series to a single string value.
Vertically concatenate the string values in the column to a single string value.

Parameters
----------
delimiter
The delimiter to insert between consecutive string values.
ignore_nulls
Ignore null values (default).

If set to ``False``, null values will be propagated.
if the column contains any null values, the output is ``None``.
If set to `False`, null values will be propagated. This means that
if the column contains any null values, the output is null.

Returns
-------
Expand Down
78 changes: 78 additions & 0 deletions py-polars/tests/unit/namespaces/string/test_concat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from datetime import datetime

import pytest

import polars as pl
from polars.testing import assert_series_equal


def test_str_concat() -> None:
s = pl.Series(["1", None, "2", None])
# propagate null
assert_series_equal(
s.str.concat("-", ignore_nulls=False), pl.Series([None], dtype=pl.String)
)
# ignore null
assert_series_equal(s.str.concat("-"), pl.Series(["1-2"]))

# str None/null is ok
s = pl.Series(["1", "None", "2", "null"])
assert_series_equal(
s.str.concat("-", ignore_nulls=False), pl.Series(["1-None-2-null"])
)
assert_series_equal(s.str.concat("-"), pl.Series(["1-None-2-null"]))


def test_str_concat2() -> None:
df = pl.DataFrame({"foo": [1, None, 2, None]})

out = df.select(pl.col("foo").str.concat("-", ignore_nulls=False))
assert out.item() is None

out = df.select(pl.col("foo").str.concat("-"))
assert out.item() == "1-2"


def test_str_concat_all_null() -> None:
s = pl.Series([None, None, None], dtype=pl.String)
assert_series_equal(
s.str.concat("-", ignore_nulls=False), pl.Series([None], dtype=pl.String)
)
assert_series_equal(s.str.concat("-", ignore_nulls=True), pl.Series([""]))


def test_str_concat_empty_list() -> None:
s = pl.Series([], dtype=pl.String)
assert_series_equal(s.str.concat("-", ignore_nulls=False), pl.Series([""]))
assert_series_equal(s.str.concat("-", ignore_nulls=True), pl.Series([""]))


def test_str_concat_empty_list2() -> None:
s = pl.Series([], dtype=pl.String)
df = pl.DataFrame({"foo": s})
result = df.select(pl.col("foo").str.concat("-")).item()
expected = ""
assert result == expected


def test_str_concat_empty_list_agg_context() -> None:
df = pl.DataFrame(data={"i": [1], "v": [None]}, schema_overrides={"v": pl.String})
result = df.group_by("i").agg(pl.col("v").drop_nulls().str.concat("-"))["v"].item()
expected = ""
assert result == expected


def test_str_concat_datetime() -> None:
df = pl.DataFrame({"d": [datetime(2020, 1, 1), None, datetime(2022, 1, 1)]})
out = df.select(pl.col("d").str.concat("|", ignore_nulls=True))
assert out.item() == "2020-01-01 00:00:00.000000|2022-01-01 00:00:00.000000"
out = df.select(pl.col("d").str.concat("|", ignore_nulls=False))
assert out.item() is None


def test_str_concat_delimiter_deprecated() -> None:
s = pl.Series(["1", None, "2", None])
with pytest.deprecated_call():
result = s.str.concat()
expected = pl.Series(["1-2"])
assert_series_equal(result, expected)
75 changes: 0 additions & 75 deletions py-polars/tests/unit/namespaces/string/test_string.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
from __future__ import annotations

from datetime import datetime
from typing import cast

import pytest

import polars as pl
Expand Down Expand Up @@ -49,78 +46,6 @@ def test_str_slice_expr() -> None:
df.select(pl.col("a").str.slice(0, -1))


def test_str_concat() -> None:
s = pl.Series(["1", None, "2", None])
# propagate null
assert_series_equal(
s.str.concat(ignore_nulls=False), pl.Series([None], dtype=pl.String)
)
# ignore null
assert_series_equal(s.str.concat(), pl.Series(["1-2"]))

# str None/null is ok
s = pl.Series(["1", "None", "2", "null"])
assert_series_equal(s.str.concat(ignore_nulls=False), pl.Series(["1-None-2-null"]))
assert_series_equal(s.str.concat(), pl.Series(["1-None-2-null"]))


def test_str_concat2() -> None:
df = pl.DataFrame({"foo": [1, None, 2, None]})

out = df.select(pl.col("foo").str.concat("-", ignore_nulls=False))
assert cast(str, out.item()) is None

out = df.select(pl.col("foo").str.concat("-"))
assert cast(str, out.item()) == "1-2"


def test_str_concat_all_null() -> None:
s = pl.Series([None, None, None], dtype=pl.String)
assert_series_equal(
s.str.concat(ignore_nulls=False), pl.Series([None], dtype=pl.String)
)
assert_series_equal(s.str.concat(ignore_nulls=True), pl.Series([""]))


def test_str_concat_single_null() -> None:
s = pl.Series([None], dtype=pl.String)
assert_series_equal(
s.str.concat(ignore_nulls=False), pl.Series([None], dtype=pl.String)
)
assert_series_equal(s.str.concat(ignore_nulls=True), pl.Series([""]))


def test_str_concat_empty_list() -> None:
s = pl.Series([], dtype=pl.String)
assert_series_equal(s.str.concat(ignore_nulls=False), pl.Series([""]))
assert_series_equal(s.str.concat(ignore_nulls=True), pl.Series([""]))


def test_str_concat_empty_list2() -> None:
s = pl.Series([], dtype=pl.String)
df = pl.DataFrame({"foo": s})
result = df.select(pl.col("foo").str.concat()).item()
expected = ""
assert result == expected


def test_str_concat_empty_list_agg_context() -> None:
df = pl.DataFrame(data={"i": [1], "v": [None]}, schema_overrides={"v": pl.String})
result = df.group_by("i").agg(pl.col("v").drop_nulls().str.concat())["v"].item()
expected = ""
assert result == expected


def test_str_concat_datetime() -> None:
df = pl.DataFrame({"d": [datetime(2020, 1, 1), None, datetime(2022, 1, 1)]})
out = df.select(pl.col("d").str.concat("|", ignore_nulls=True))
assert (
cast(str, out.item()) == "2020-01-01 00:00:00.000000|2022-01-01 00:00:00.000000"
)
out = df.select(pl.col("d").str.concat("|", ignore_nulls=False))
assert cast(str, out.item()) is None


def test_str_len_bytes() -> None:
s = pl.Series(["Café", None, "345", "東京"])
result = s.str.len_bytes()
Expand Down