From a16fceb52e9a0775de49454ccf61311681cc5e84 Mon Sep 17 00:00:00 2001 From: nameexhaustion Date: Tue, 9 Jan 2024 11:51:33 +1100 Subject: [PATCH 1/3] fix: incorrect `Expr.replace` when old is single NULL --- crates/polars-core/src/series/mod.rs | 3 ++- crates/polars-ops/src/series/ops/replace.rs | 17 ++++++++++++++--- py-polars/tests/unit/operations/test_replace.py | 15 +++++++++++++++ 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index 1e1039af4a31..6cca321e0c21 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -421,7 +421,8 @@ impl Series { } /// Create a new ChunkedArray with values from self where the mask evaluates `true` and values - /// from `other` where the mask evaluates `false` + /// from `other` where the mask evaluates `false`. This function automatically broadcasts unit + /// length inputs. #[cfg(feature = "zip_with")] pub fn zip_with(&self, mask: &BooleanChunked, other: &Series) -> PolarsResult { let (lhs, rhs) = coerce_lhs_rhs(self, other)?; diff --git a/crates/polars-ops/src/series/ops/replace.rs b/crates/polars-ops/src/series/ops/replace.rs index af5e4ebf2e50..9c06b54037e1 100644 --- a/crates/polars-ops/src/series/ops/replace.rs +++ b/crates/polars-ops/src/series/ops/replace.rs @@ -1,3 +1,5 @@ +use std::ops::BitOr; + use polars_core::prelude::*; use polars_core::utils::try_get_supertype; use polars_error::{polars_bail, polars_ensure, PolarsResult}; @@ -62,9 +64,18 @@ fn replace_by_single( new: &Series, default: &Series, ) -> PolarsResult { - let mask = is_in(s, old)?; - let new_broadcast = new.new_from_index(0, default.len()); - new_broadcast.zip_with(&mask, default) + let mask = if old.null_count() == old.len() { + s.is_null() + } else { + let mask = is_in(s, old)?; + + if old.null_count() == 0 { + mask + } else { + mask.bitor(s.is_null()) + } + }; + new.zip_with(&mask, default) } /// General case for replacing by multiple values diff --git a/py-polars/tests/unit/operations/test_replace.py b/py-polars/tests/unit/operations/test_replace.py index fa7375626cc3..c077e26338b1 100644 --- a/py-polars/tests/unit/operations/test_replace.py +++ b/py-polars/tests/unit/operations/test_replace.py @@ -447,6 +447,21 @@ def test_replace_fast_path_one_to_one() -> None: assert_frame_equal(result, expected) +def test_replace_fast_path_one_null_to_one() -> None: + # https://github.com/pola-rs/polars/issues/13391 + lf = pl.LazyFrame({"a": [1, None]}) + result = lf.select(pl.col("a").replace(None, 100)) + expected = pl.LazyFrame({"a": [1, 100]}) + assert_frame_equal(result, expected) + + +def test_replace_fast_path_many_with_null_to_one() -> None: + lf = pl.LazyFrame({"a": [1, 2, None]}) + result = lf.select(pl.col("a").replace([1, None], 100)) + expected = pl.LazyFrame({"a": [100, 2, 100]}) + assert_frame_equal(result, expected) + + def test_replace_fast_path_many_to_one() -> None: lf = pl.LazyFrame({"a": [1, 2, 2, 3]}) result = lf.select(pl.col("a").replace([2, 3], 100)) From 7ef2e8ff640a096174e310b94c5e396c1dd7e17b Mon Sep 17 00:00:00 2001 From: nameexhaustion Date: Tue, 9 Jan 2024 13:34:34 +1100 Subject: [PATCH 2/3] fix: broadcast in zip_with for Null dtype --- .../polars-core/src/series/implementations/null.rs | 14 ++++++++++++-- py-polars/tests/unit/functions/test_whenthen.py | 11 +++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/crates/polars-core/src/series/implementations/null.rs b/crates/polars-core/src/series/implementations/null.rs index 5676a607a898..b0e4a7d6e26a 100644 --- a/crates/polars-core/src/series/implementations/null.rs +++ b/crates/polars-core/src/series/implementations/null.rs @@ -64,8 +64,18 @@ impl PrivateSeries for NullChunked { } #[cfg(feature = "zip_with")] - fn zip_with_same_type(&self, _mask: &BooleanChunked, _other: &Series) -> PolarsResult { - Ok(self.clone().into_series()) + fn zip_with_same_type(&self, mask: &BooleanChunked, other: &Series) -> PolarsResult { + let len = match (self.len(), mask.len(), other.len()) { + (a, b, c) if a == b && b == c => a, + (1, a, b) | (a, 1, b) | (a, b, 1) if a == b => a, + (a, 1, 1) | (1, a, 1) | (1, 1, a) => a, + (_, 0, _) => 0, + _ => { + polars_bail!(ShapeMismatch: "shapes of `self`, `mask` and `other` are not suitable for `zip_with` operation") + }, + }; + + Ok(Self::new(self.name().into(), len).into_series()) } fn explode_by_offsets(&self, offsets: &[i64]) -> Series { ExplodeByOffsets::explode_by_offsets(self, offsets) diff --git a/py-polars/tests/unit/functions/test_whenthen.py b/py-polars/tests/unit/functions/test_whenthen.py index 2e85bd76108d..d5a25122c22a 100644 --- a/py-polars/tests/unit/functions/test_whenthen.py +++ b/py-polars/tests/unit/functions/test_whenthen.py @@ -514,3 +514,14 @@ def test_when_predicates_kwargs() -> None: ), pl.DataFrame({"misc": ["?", "z in (a|b), y<0", "?", "y=1"]}), ) + + +def test_when_then_null_broadcast() -> None: + assert ( + pl.select( + pl.when(pl.repeat(True, 2, dtype=pl.Boolean)).then( + pl.repeat(None, 1, dtype=pl.Null) + ) + ).height + == 2 + ) From 680ed02137b915c418aec8500e8561465e463f24 Mon Sep 17 00:00:00 2001 From: nameexhaustion Date: Tue, 9 Jan 2024 13:35:27 +1100 Subject: [PATCH 3/3] rename `test_whenthen.py` -> `test_when_then.py` --- .../tests/unit/functions/{test_whenthen.py => test_when_then.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename py-polars/tests/unit/functions/{test_whenthen.py => test_when_then.py} (100%) diff --git a/py-polars/tests/unit/functions/test_whenthen.py b/py-polars/tests/unit/functions/test_when_then.py similarity index 100% rename from py-polars/tests/unit/functions/test_whenthen.py rename to py-polars/tests/unit/functions/test_when_then.py