From 299184931f46de5c8ee2af4d88f0f4b357474b56 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Wed, 5 Oct 2022 16:49:46 +0200 Subject: [PATCH] fix(rust, python): unique include null (#5112) --- .../src/chunked_array/ops/unique/mod.rs | 34 +++++++++++++++++-- py-polars/tests/unit/test_lazy.py | 3 ++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/polars/polars-core/src/chunked_array/ops/unique/mod.rs b/polars/polars-core/src/chunked_array/ops/unique/mod.rs index bc5f94871838..bf532be9f2a2 100644 --- a/polars/polars-core/src/chunked_array/ops/unique/mod.rs +++ b/polars/polars-core/src/chunked_array/ops/unique/mod.rs @@ -185,8 +185,38 @@ where } match self.is_sorted2() { IsSorted::Ascending | IsSorted::Descending => { - let mask = self.not_equal(&self.shift(1)); - self.filter(&mask) + // TODO! optimize this branch + if self.null_count() > 0 { + let mut arr = MutablePrimitiveArray::with_capacity(self.len()); + let mut iter = self.into_iter(); + let mut last = None; + + if let Some(val) = iter.next() { + last = val; + arr.push(val) + }; + + #[allow(clippy::unnecessary_filter_map)] + let to_extend = iter.filter_map(|opt_val| { + if opt_val != last { + last = opt_val; + Some(opt_val) + } else { + None + } + }); + + arr.extend(to_extend); + let arr: PrimitiveArray = arr.into(); + + Ok(ChunkedArray::from_chunks( + self.name(), + vec![Box::new(arr) as ArrayRef], + )) + } else { + let mask = self.not_equal(&self.shift(1)); + self.filter(&mask) + } } IsSorted::Not => { let sorted = self.sort(false); diff --git a/py-polars/tests/unit/test_lazy.py b/py-polars/tests/unit/test_lazy.py index 5c113ae9fac9..5ec03ce0c6b5 100644 --- a/py-polars/tests/unit/test_lazy.py +++ b/py-polars/tests/unit/test_lazy.py @@ -1221,6 +1221,9 @@ def test_unique() -> None: .collect() .frame_equal(expected) ) + s0 = pl.Series("a", [1, 2, None, 2]) + # test if the null is included + assert s0.unique().to_list() == [None, 1, 2] def test_lazy_concat(df: pl.DataFrame) -> None: