diff --git a/polars/polars-core/src/chunked_array/arithmetic.rs b/polars/polars-core/src/chunked_array/arithmetic.rs index 181d4bdd0942..17396d04f146 100644 --- a/polars/polars-core/src/chunked_array/arithmetic.rs +++ b/polars/polars-core/src/chunked_array/arithmetic.rs @@ -156,7 +156,7 @@ where .zip(rhs.downcast_iter()) .map(|(lhs, rhs)| Box::new(kernel(lhs, rhs)) as ArrayRef) .collect(); - lhs.copy_with_chunks(chunks, false) + lhs.copy_with_chunks(chunks, false, false) } // broadcast right path (_, 1) => { diff --git a/polars/polars-core/src/chunked_array/mod.rs b/polars/polars-core/src/chunked_array/mod.rs index 45fe40504f48..002763da9665 100644 --- a/polars/polars-core/src/chunked_array/mod.rs +++ b/polars/polars-core/src/chunked_array/mod.rs @@ -166,6 +166,10 @@ impl ChunkedArray { self.bit_settings.contains(Settings::SORTED_DSC) } + pub fn unset_fast_explode_list(&mut self) { + self.bit_settings.remove(Settings::FAST_EXPLODE_LIST) + } + pub fn is_sorted_flag2(&self) -> IsSorted { if self.is_sorted_flag() { IsSorted::Ascending @@ -319,7 +323,12 @@ impl ChunkedArray { } /// Create a new ChunkedArray from self, where the chunks are replaced. - fn copy_with_chunks(&self, chunks: Vec, keep_sorted: bool) -> Self { + fn copy_with_chunks( + &self, + chunks: Vec, + keep_sorted: bool, + keep_fast_explode: bool, + ) -> Self { let mut out = ChunkedArray { field: self.field.clone(), chunks, @@ -331,6 +340,9 @@ impl ChunkedArray { if !keep_sorted { out.set_sorted_flag(IsSorted::Not); } + if !keep_fast_explode { + out.unset_fast_explode_list() + } out } @@ -390,7 +402,7 @@ impl ChunkedArray { a.with_validity(validity) }) .collect(); - self.copy_with_chunks(chunks, true) + self.copy_with_chunks(chunks, true, false) } /// Get data type of ChunkedArray. diff --git a/polars/polars-core/src/chunked_array/ops/chunkops.rs b/polars/polars-core/src/chunked_array/ops/chunkops.rs index c47b00b281eb..7d862334b125 100644 --- a/polars/polars-core/src/chunked_array/ops/chunkops.rs +++ b/polars/polars-core/src/chunked_array/ops/chunkops.rs @@ -103,7 +103,7 @@ impl ChunkedArray { self.clone() } else { let chunks = inner_rechunk(&self.chunks); - self.copy_with_chunks(chunks, true) + self.copy_with_chunks(chunks, true, true) } } } @@ -117,7 +117,7 @@ impl ChunkedArray { #[inline] pub fn slice(&self, offset: i64, length: usize) -> Self { let (chunks, len) = slice(&self.chunks, offset, length, self.len()); - let mut out = self.copy_with_chunks(chunks, true); + let mut out = self.copy_with_chunks(chunks, true, true); out.length = len as IdxSize; out } diff --git a/polars/polars-core/src/chunked_array/ops/filter.rs b/polars/polars-core/src/chunked_array/ops/filter.rs index c39b6f9f0bfd..a83c430dd610 100644 --- a/polars/polars-core/src/chunked_array/ops/filter.rs +++ b/polars/polars-core/src/chunked_array/ops/filter.rs @@ -46,7 +46,7 @@ where .zip(filter.downcast_iter()) .map(|(left, mask)| filter_fn(left, mask).unwrap()) .collect::>(); - Ok(self.copy_with_chunks(chunks, true)) + Ok(self.copy_with_chunks(chunks, true, true)) } } @@ -67,7 +67,7 @@ impl ChunkFilter for BooleanChunked { .zip(filter.downcast_iter()) .map(|(left, mask)| filter_fn(left, mask).unwrap()) .collect::>(); - Ok(self.copy_with_chunks(chunks, true)) + Ok(self.copy_with_chunks(chunks, true, true)) } } @@ -89,7 +89,7 @@ impl ChunkFilter for Utf8Chunked { .map(|(left, mask)| filter_fn(left, mask).unwrap()) .collect::>(); - Ok(self.copy_with_chunks(chunks, true)) + Ok(self.copy_with_chunks(chunks, true, true)) } } @@ -112,7 +112,7 @@ impl ChunkFilter for BinaryChunked { .map(|(left, mask)| filter_fn(left, mask).unwrap()) .collect::>(); - Ok(self.copy_with_chunks(chunks, true)) + Ok(self.copy_with_chunks(chunks, true, true)) } } diff --git a/polars/polars-core/src/chunked_array/ops/take/mod.rs b/polars/polars-core/src/chunked_array/ops/take/mod.rs index 78584b835462..46684c6e154c 100644 --- a/polars/polars-core/src/chunked_array/ops/take/mod.rs +++ b/polars/polars-core/src/chunked_array/ops/take/mod.rs @@ -58,6 +58,16 @@ macro_rules! take_opt_iter_n_chunks_unchecked { }}; } +impl ChunkedArray +where + T: PolarsDataType, +{ + fn finish_from_array(&self, array: Box) -> Self { + let keep_fast_explode = array.null_count() == 0; + self.copy_with_chunks(vec![array], false, keep_fast_explode) + } +} + impl ChunkTake for ChunkedArray where T: PolarsNumericType, @@ -97,7 +107,7 @@ where } } }; - self.copy_with_chunks(vec![array], false) + self.finish_from_array(array) } TakeIdx::Iter(iter) => { if self.is_empty() { @@ -118,7 +128,7 @@ where return ca; } }; - self.copy_with_chunks(vec![array], false) + self.finish_from_array(array) } TakeIdx::IterNulls(iter) => { if self.is_empty() { @@ -139,7 +149,7 @@ where return ca; } }; - self.copy_with_chunks(vec![array], false) + self.finish_from_array(array) } } } @@ -188,7 +198,7 @@ impl ChunkTake for BooleanChunked { } } }; - self.copy_with_chunks(vec![array], false) + self.finish_from_array(array) } TakeIdx::Iter(iter) => { if self.is_empty() { @@ -205,7 +215,7 @@ impl ChunkTake for BooleanChunked { return ca; } }; - self.copy_with_chunks(vec![array], false) + self.finish_from_array(array) } TakeIdx::IterNulls(iter) => { if self.is_empty() { @@ -225,7 +235,7 @@ impl ChunkTake for BooleanChunked { return ca; } }; - self.copy_with_chunks(vec![array], false) + self.finish_from_array(array) } } } @@ -274,7 +284,7 @@ impl ChunkTake for Utf8Chunked { } } }; - self.copy_with_chunks(vec![array], false) + self.finish_from_array(array) } TakeIdx::Iter(iter) => { let array = match (self.has_validity(), self.chunks.len()) { @@ -288,7 +298,7 @@ impl ChunkTake for Utf8Chunked { return ca; } }; - self.copy_with_chunks(vec![array], false) + self.finish_from_array(array) } TakeIdx::IterNulls(iter) => { let array = match (self.has_validity(), self.chunks.len()) { @@ -305,7 +315,7 @@ impl ChunkTake for Utf8Chunked { return ca; } }; - self.copy_with_chunks(vec![array], false) + self.finish_from_array(array) } } } @@ -356,7 +366,7 @@ impl ChunkTake for BinaryChunked { } } }; - self.copy_with_chunks(vec![array], false) + self.finish_from_array(array) } TakeIdx::Iter(iter) => { let array = match (self.has_validity(), self.chunks.len()) { @@ -370,7 +380,7 @@ impl ChunkTake for BinaryChunked { return ca; } }; - self.copy_with_chunks(vec![array], false) + self.finish_from_array(array) } TakeIdx::IterNulls(iter) => { let array = match (self.has_validity(), self.chunks.len()) { @@ -387,7 +397,7 @@ impl ChunkTake for BinaryChunked { return ca; } }; - self.copy_with_chunks(vec![array], false) + self.finish_from_array(array) } } } @@ -447,7 +457,7 @@ impl ChunkTake for ListChunked { } } }; - ca_self.copy_with_chunks(vec![array], false) + self.finish_from_array(array) } // todo! fast path for single chunk TakeIdx::Iter(iter) => { diff --git a/py-polars/tests/unit/test_lists.py b/py-polars/tests/unit/test_lists.py index 3734ad04d33a..1c43345ff0c3 100644 --- a/py-polars/tests/unit/test_lists.py +++ b/py-polars/tests/unit/test_lists.py @@ -572,6 +572,16 @@ def test_fast_explode_flag() -> None: df1 = pl.DataFrame({"values": [[[1, 2]]]}) assert df1.clone().vstack(df1)["values"].flags["FAST_EXPLODE"] + # test take that produces a null in list + df = pl.DataFrame({"a": [1, 2, 1, 3]}) + df_b = pl.DataFrame({"a": [1, 2], "c": [["1", "2", "c"], ["1", "2", "c"]]}) + assert df_b["c"].flags["FAST_EXPLODE"] + + # join produces a null + assert not (df.join(df_b, on=["a"], how="left").select(["c"]))["c"].flags[ + "FAST_EXPLODE" + ] + def test_list_amortized_apply_explode_5812() -> None: s = pl.Series([None, [1, 3], [0, -3], [1, 2, 2]])