From 625056bd73b916705525e2fb23ab2a36ccc48c9c Mon Sep 17 00:00:00 2001 From: Edward Davis <91922857+edavisau@users.noreply.github.com> Date: Sat, 20 Jan 2024 20:26:00 +1100 Subject: [PATCH] feat(rust, python): Support zero fill null strategy for binary and string columns (#13869) --- crates/polars-core/src/chunked_array/ops/fill_null.rs | 1 + .../tests/unit/functions/aggregation/test_horizontal.py | 8 ++++++++ py-polars/tests/unit/series/test_series.py | 6 ++++++ 3 files changed, 15 insertions(+) diff --git a/crates/polars-core/src/chunked_array/ops/fill_null.rs b/crates/polars-core/src/chunked_array/ops/fill_null.rs index efab235944e0..9458021cf92d 100644 --- a/crates/polars-core/src/chunked_array/ops/fill_null.rs +++ b/crates/polars-core/src/chunked_array/ops/fill_null.rs @@ -383,6 +383,7 @@ fn fill_null_binary(ca: &BinaryChunked, strategy: FillNullStrategy) -> PolarsRes FillNullStrategy::Max => { ca.fill_null_with_values(ca.max_binary().ok_or_else(err_fill_null)?) }, + FillNullStrategy::Zero => ca.fill_null_with_values(&[]), strat => polars_bail!(InvalidOperation: "fill-null strategy {:?} is not supported", strat), } } diff --git a/py-polars/tests/unit/functions/aggregation/test_horizontal.py b/py-polars/tests/unit/functions/aggregation/test_horizontal.py index 17db61cd9c85..cc88e506919e 100644 --- a/py-polars/tests/unit/functions/aggregation/test_horizontal.py +++ b/py-polars/tests/unit/functions/aggregation/test_horizontal.py @@ -240,6 +240,14 @@ def test_sum_max_min() -> None: assert_series_equal(out["min"], pl.Series("min", [1.0, 2.0, 3.0])) +def test_str_sum_horizontal() -> None: + df = pl.DataFrame( + {"A": ["a", "b", None, "c", None], "B": ["f", "g", "h", None, None]} + ) + out = df.select(pl.sum_horizontal("A", "B")) + assert_series_equal(out["A"], pl.Series("A", ["af", "bg", "h", "c", ""])) + + def test_cum_sum_horizontal() -> None: df = pl.DataFrame( { diff --git a/py-polars/tests/unit/series/test_series.py b/py-polars/tests/unit/series/test_series.py index 762ab26b5c27..eb3d5c95ed83 100644 --- a/py-polars/tests/unit/series/test_series.py +++ b/py-polars/tests/unit/series/test_series.py @@ -1025,10 +1025,16 @@ def test_fill_null() -> None: b = pl.Series("b", ["a", None, "c", None, "e"]) assert b.fill_null(strategy="min").to_list() == ["a", "a", "c", "a", "e"] assert b.fill_null(strategy="max").to_list() == ["a", "e", "c", "e", "e"] + assert b.fill_null(strategy="zero").to_list() == ["a", "", "c", "", "e"] + assert b.fill_null(strategy="forward").to_list() == ["a", "a", "c", "c", "e"] + assert b.fill_null(strategy="backward").to_list() == ["a", "c", "c", "e", "e"] c = pl.Series("c", [b"a", None, b"c", None, b"e"]) assert c.fill_null(strategy="min").to_list() == [b"a", b"a", b"c", b"a", b"e"] assert c.fill_null(strategy="max").to_list() == [b"a", b"e", b"c", b"e", b"e"] + assert c.fill_null(strategy="zero").to_list() == [b"a", b"", b"c", b"", b"e"] + assert c.fill_null(strategy="forward").to_list() == [b"a", b"a", b"c", b"c", b"e"] + assert c.fill_null(strategy="backward").to_list() == [b"a", b"c", b"c", b"e", b"e"] df = pl.DataFrame( [