Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(rust, python): don't fast explode on null introducing take #6890

Merged
merged 1 commit into from
Feb 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ where
.zip(rhs.downcast_iter())
.map(|(lhs, rhs)| Box::new(kernel(lhs, rhs)) as ArrayRef)
.collect();
lhs.copy_with_chunks(chunks, false)
lhs.copy_with_chunks(chunks, false, false)
}
// broadcast right path
(_, 1) => {
Expand Down
16 changes: 14 additions & 2 deletions polars/polars-core/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,10 @@ impl<T: PolarsDataType> ChunkedArray<T> {
self.bit_settings.contains(Settings::SORTED_DSC)
}

pub fn unset_fast_explode_list(&mut self) {
self.bit_settings.remove(Settings::FAST_EXPLODE_LIST)
}

pub fn is_sorted_flag2(&self) -> IsSorted {
if self.is_sorted_flag() {
IsSorted::Ascending
Expand Down Expand Up @@ -319,7 +323,12 @@ impl<T: PolarsDataType> ChunkedArray<T> {
}

/// Create a new ChunkedArray from self, where the chunks are replaced.
fn copy_with_chunks(&self, chunks: Vec<ArrayRef>, keep_sorted: bool) -> Self {
fn copy_with_chunks(
&self,
chunks: Vec<ArrayRef>,
keep_sorted: bool,
keep_fast_explode: bool,
) -> Self {
let mut out = ChunkedArray {
field: self.field.clone(),
chunks,
Expand All @@ -331,6 +340,9 @@ impl<T: PolarsDataType> ChunkedArray<T> {
if !keep_sorted {
out.set_sorted_flag(IsSorted::Not);
}
if !keep_fast_explode {
out.unset_fast_explode_list()
}
out
}

Expand Down Expand Up @@ -390,7 +402,7 @@ impl<T: PolarsDataType> ChunkedArray<T> {
a.with_validity(validity)
})
.collect();
self.copy_with_chunks(chunks, true)
self.copy_with_chunks(chunks, true, false)
}

/// Get data type of ChunkedArray.
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-core/src/chunked_array/ops/chunkops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ impl<T: PolarsDataType> ChunkedArray<T> {
self.clone()
} else {
let chunks = inner_rechunk(&self.chunks);
self.copy_with_chunks(chunks, true)
self.copy_with_chunks(chunks, true, true)
}
}
}
Expand All @@ -117,7 +117,7 @@ impl<T: PolarsDataType> ChunkedArray<T> {
#[inline]
pub fn slice(&self, offset: i64, length: usize) -> Self {
let (chunks, len) = slice(&self.chunks, offset, length, self.len());
let mut out = self.copy_with_chunks(chunks, true);
let mut out = self.copy_with_chunks(chunks, true, true);
out.length = len as IdxSize;
out
}
Expand Down
8 changes: 4 additions & 4 deletions polars/polars-core/src/chunked_array/ops/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ where
.zip(filter.downcast_iter())
.map(|(left, mask)| filter_fn(left, mask).unwrap())
.collect::<Vec<_>>();
Ok(self.copy_with_chunks(chunks, true))
Ok(self.copy_with_chunks(chunks, true, true))
}
}

Expand All @@ -67,7 +67,7 @@ impl ChunkFilter<BooleanType> for BooleanChunked {
.zip(filter.downcast_iter())
.map(|(left, mask)| filter_fn(left, mask).unwrap())
.collect::<Vec<_>>();
Ok(self.copy_with_chunks(chunks, true))
Ok(self.copy_with_chunks(chunks, true, true))
}
}

Expand All @@ -89,7 +89,7 @@ impl ChunkFilter<Utf8Type> for Utf8Chunked {
.map(|(left, mask)| filter_fn(left, mask).unwrap())
.collect::<Vec<_>>();

Ok(self.copy_with_chunks(chunks, true))
Ok(self.copy_with_chunks(chunks, true, true))
}
}

Expand All @@ -112,7 +112,7 @@ impl ChunkFilter<BinaryType> for BinaryChunked {
.map(|(left, mask)| filter_fn(left, mask).unwrap())
.collect::<Vec<_>>();

Ok(self.copy_with_chunks(chunks, true))
Ok(self.copy_with_chunks(chunks, true, true))
}
}

Expand Down
36 changes: 23 additions & 13 deletions polars/polars-core/src/chunked_array/ops/take/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ macro_rules! take_opt_iter_n_chunks_unchecked {
}};
}

impl<T> ChunkedArray<T>
where
T: PolarsDataType,
{
fn finish_from_array(&self, array: Box<dyn Array>) -> Self {
let keep_fast_explode = array.null_count() == 0;
self.copy_with_chunks(vec![array], false, keep_fast_explode)
}
}

impl<T> ChunkTake for ChunkedArray<T>
where
T: PolarsNumericType,
Expand Down Expand Up @@ -97,7 +107,7 @@ where
}
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
TakeIdx::Iter(iter) => {
if self.is_empty() {
Expand All @@ -118,7 +128,7 @@ where
return ca;
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
TakeIdx::IterNulls(iter) => {
if self.is_empty() {
Expand All @@ -139,7 +149,7 @@ where
return ca;
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
}
}
Expand Down Expand Up @@ -188,7 +198,7 @@ impl ChunkTake for BooleanChunked {
}
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
TakeIdx::Iter(iter) => {
if self.is_empty() {
Expand All @@ -205,7 +215,7 @@ impl ChunkTake for BooleanChunked {
return ca;
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
TakeIdx::IterNulls(iter) => {
if self.is_empty() {
Expand All @@ -225,7 +235,7 @@ impl ChunkTake for BooleanChunked {
return ca;
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
}
}
Expand Down Expand Up @@ -274,7 +284,7 @@ impl ChunkTake for Utf8Chunked {
}
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
TakeIdx::Iter(iter) => {
let array = match (self.has_validity(), self.chunks.len()) {
Expand All @@ -288,7 +298,7 @@ impl ChunkTake for Utf8Chunked {
return ca;
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
TakeIdx::IterNulls(iter) => {
let array = match (self.has_validity(), self.chunks.len()) {
Expand All @@ -305,7 +315,7 @@ impl ChunkTake for Utf8Chunked {
return ca;
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
}
}
Expand Down Expand Up @@ -356,7 +366,7 @@ impl ChunkTake for BinaryChunked {
}
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
TakeIdx::Iter(iter) => {
let array = match (self.has_validity(), self.chunks.len()) {
Expand All @@ -370,7 +380,7 @@ impl ChunkTake for BinaryChunked {
return ca;
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
TakeIdx::IterNulls(iter) => {
let array = match (self.has_validity(), self.chunks.len()) {
Expand All @@ -387,7 +397,7 @@ impl ChunkTake for BinaryChunked {
return ca;
}
};
self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
}
}
Expand Down Expand Up @@ -447,7 +457,7 @@ impl ChunkTake for ListChunked {
}
}
};
ca_self.copy_with_chunks(vec![array], false)
self.finish_from_array(array)
}
// todo! fast path for single chunk
TakeIdx::Iter(iter) => {
Expand Down
10 changes: 10 additions & 0 deletions py-polars/tests/unit/test_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,16 @@ def test_fast_explode_flag() -> None:
df1 = pl.DataFrame({"values": [[[1, 2]]]})
assert df1.clone().vstack(df1)["values"].flags["FAST_EXPLODE"]

# test take that produces a null in list
df = pl.DataFrame({"a": [1, 2, 1, 3]})
df_b = pl.DataFrame({"a": [1, 2], "c": [["1", "2", "c"], ["1", "2", "c"]]})
assert df_b["c"].flags["FAST_EXPLODE"]

# join produces a null
assert not (df.join(df_b, on=["a"], how="left").select(["c"]))["c"].flags[
"FAST_EXPLODE"
]


def test_list_amortized_apply_explode_5812() -> None:
s = pl.Series([None, [1, 3], [0, -3], [1, 2, 2]])
Expand Down