Skip to content

Commit

Permalink
fix(rust, python): don't fast explode on null introducing take (#6890)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Feb 15, 2023
1 parent d311ac2 commit 0263cbe
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 22 deletions.
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ where
.zip(rhs.downcast_iter())
.map(|(lhs, rhs)| Box::new(kernel(lhs, rhs)) as ArrayRef)
.collect();
lhs.copy_with_chunks(chunks, false)
lhs.copy_with_chunks(chunks, false, false)
}
// broadcast right path
(_, 1) => {
Expand Down
16 changes: 14 additions & 2 deletions polars/polars-core/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,10 @@ impl<T: PolarsDataType> ChunkedArray<T> {
self.bit_settings.contains(Settings::SORTED_DSC)
}

pub fn unset_fast_explode_list(&mut self) {
self.bit_settings.remove(Settings::FAST_EXPLODE_LIST)
}

pub fn is_sorted_flag2(&self) -> IsSorted {
if self.is_sorted_flag() {
IsSorted::Ascending
Expand Down Expand Up @@ -319,7 +323,12 @@ impl<T: PolarsDataType> ChunkedArray<T> {
}

/// Create a new ChunkedArray from self, where the chunks are replaced.
fn copy_with_chunks(&self, chunks: Vec<ArrayRef>, keep_sorted: bool) -> Self {
fn copy_with_chunks(
&self,
chunks: Vec<ArrayRef>,
keep_sorted: bool,
keep_fast_explode: bool,
) -> Self {
let mut out = ChunkedArray {
field: self.field.clone(),
chunks,
Expand All @@ -331,6 +340,9 @@ impl<T: PolarsDataType> ChunkedArray<T> {
if !keep_sorted {
out.set_sorted_flag(IsSorted::Not);
}
if !keep_fast_explode {
out.unset_fast_explode_list()
}
out
}

Expand Down Expand Up @@ -390,7 +402,7 @@ impl<T: PolarsDataType> ChunkedArray<T> {
a.with_validity(validity)
})
.collect();
self.copy_with_chunks(chunks, true)
self.copy_with_chunks(chunks, true, false)
}

/// Get data type of ChunkedArray.
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-core/src/chunked_array/ops/chunkops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ impl<T: PolarsDataType> ChunkedArray<T> {
self.clone()
} else {
let chunks = inner_rechunk(&self.chunks);
self.copy_with_chunks(chunks, true)
self.copy_with_chunks(chunks, true, true)
}
}
}
Expand All @@ -117,7 +117,7 @@ impl<T: PolarsDataType> ChunkedArray<T> {
#[inline]
pub fn slice(&self, offset: i64, length: usize) -> Self {
let (chunks, len) = slice(&self.chunks, offset, length, self.len());
let mut out = self.copy_with_chunks(chunks, true);
let mut out = self.copy_with_chunks(chunks, true, true);
out.length = len as IdxSize;
out
}
Expand Down
8 changes: 4 additions & 4 deletions polars/polars-core/src/chunked_array/ops/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ where
.zip(filter.downcast_iter())
.map(|(left, mask)| filter_fn(left, mask).unwrap())
.collect::<Vec<_>>();
Ok(self.copy_with_chunks(chunks, true))
Ok(self.copy_with_chunks(chunks, true, true))
}
}

Expand All @@ -67,7 +67,7 @@ impl ChunkFilter<BooleanType> for BooleanChunked {
.zip(filter.downcast_iter())
.map(|(left, mask)| filter_fn(left, mask).unwrap())
.collect::<Vec<_>>();
Ok(self.copy_with_chunks(chunks, true))
Ok(self.copy_with_chunks(chunks, true, true))
}
}

Expand All @@ -89,7 +89,7 @@ impl ChunkFilter<Utf8Type> for Utf8Chunked {
.map(|(left, mask)| filter_fn(left, mask).unwrap())
.collect::<Vec<_>>();

Ok(self.copy_with_chunks(chunks, true))
Ok(self.copy_with_chunks(chunks, true, true))
}
}

Expand All @@ -112,7 +112,7 @@ impl ChunkFilter<BinaryType> for BinaryChunked {
.map(|(left, mask)| filter_fn(left, mask).unwrap())
.collect::<Vec<_>>();

Ok(self.copy_with_chunks(chunks, true))
Ok(self.copy_with_chunks(chunks, true, true))
}
}

Expand Down
36 changes: 23 additions & 13 deletions polars/polars-core/src/chunked_array/ops/take/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ macro_rules! take_opt_iter_n_chunks_unchecked {
}};
}

impl<T> ChunkedArray<T>
where
T: PolarsDataType,
{
fn finish_from_array(&self, array: Box<dyn Array>) -> Self {
let keep_fast_explode = array.null_count() == 0;
self.copy_with_chunks(vec![array], false, keep_fast_explode)
}
}

impl<T> ChunkTake for ChunkedArray<T>
where
T: PolarsNumericType,
Expand Down Expand Up @@ -97,7 +107,7 @@ where
}
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
TakeIdx::Iter(iter) => {
if self.is_empty() {
Expand All @@ -118,7 +128,7 @@ where
return ca;
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
TakeIdx::IterNulls(iter) => {
if self.is_empty() {
Expand All @@ -139,7 +149,7 @@ where
return ca;
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
}
}
Expand Down Expand Up @@ -188,7 +198,7 @@ impl ChunkTake for BooleanChunked {
}
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
TakeIdx::Iter(iter) => {
if self.is_empty() {
Expand All @@ -205,7 +215,7 @@ impl ChunkTake for BooleanChunked {
return ca;
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
TakeIdx::IterNulls(iter) => {
if self.is_empty() {
Expand All @@ -225,7 +235,7 @@ impl ChunkTake for BooleanChunked {
return ca;
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
}
}
Expand Down Expand Up @@ -274,7 +284,7 @@ impl ChunkTake for Utf8Chunked {
}
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
TakeIdx::Iter(iter) => {
let array = match (self.has_validity(), self.chunks.len()) {
Expand All @@ -288,7 +298,7 @@ impl ChunkTake for Utf8Chunked {
return ca;
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
TakeIdx::IterNulls(iter) => {
let array = match (self.has_validity(), self.chunks.len()) {
Expand All @@ -305,7 +315,7 @@ impl ChunkTake for Utf8Chunked {
return ca;
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
}
}
Expand Down Expand Up @@ -356,7 +366,7 @@ impl ChunkTake for BinaryChunked {
}
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
TakeIdx::Iter(iter) => {
let array = match (self.has_validity(), self.chunks.len()) {
Expand All @@ -370,7 +380,7 @@ impl ChunkTake for BinaryChunked {
return ca;
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
TakeIdx::IterNulls(iter) => {
let array = match (self.has_validity(), self.chunks.len()) {
Expand All @@ -387,7 +397,7 @@ impl ChunkTake for BinaryChunked {
return ca;
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
}
}
Expand Down Expand Up @@ -447,7 +457,7 @@ impl ChunkTake for ListChunked {
}
}
};
ca_self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
// todo! fast path for single chunk
TakeIdx::Iter(iter) => {
Expand Down
10 changes: 10 additions & 0 deletions py-polars/tests/unit/test_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,16 @@ def test_fast_explode_flag() -> None:
df1 = pl.DataFrame({"values": [[[1, 2]]]})
assert df1.clone().vstack(df1)["values"].flags["FAST_EXPLODE"]

# test take that produces a null in list
df = pl.DataFrame({"a": [1, 2, 1, 3]})
df_b = pl.DataFrame({"a": [1, 2], "c": [["1", "2", "c"], ["1", "2", "c"]]})
assert df_b["c"].flags["FAST_EXPLODE"]

# join produces a null
assert not (df.join(df_b, on=["a"], how="left").select(["c"]))["c"].flags[
"FAST_EXPLODE"
]


def test_list_amortized_apply_explode_5812() -> None:
s = pl.Series([None, [1, 3], [0, -3], [1, 2, 2]])
Expand Down

0 comments on commit 0263cbe

Please sign in to comment.