Skip to content

Commit

Permalink
elide bound checks in boolean take_unchecked (#4274)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Aug 5, 2022
1 parent 833e588 commit cdc8554
Show file tree
Hide file tree
Showing 8 changed files with 99 additions and 8 deletions.
1 change: 1 addition & 0 deletions polars/polars-arrow/src/compute/mod.rs
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pub mod cast;
pub mod take;
88 changes: 88 additions & 0 deletions polars/polars-arrow/src/compute/take/boolean.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
use crate::index::IdxSize;
use arrow::{
array::{Array, BooleanArray, PrimitiveArray},
bitmap::{Bitmap, MutableBitmap},
};

unsafe fn take_values(values: &Bitmap, indices: &[IdxSize]) -> Bitmap {
let values = indices.iter().map(|&index| {
debug_assert!((index as usize) < values.len());
values.get_bit_unchecked(index as usize)
});
Bitmap::from_trusted_len_iter(values)
}

// take implementation when neither values nor indices contain nulls
unsafe fn take_no_validity(values: &Bitmap, indices: &[IdxSize]) -> (Bitmap, Option<Bitmap>) {
(take_values(values, indices), None)
}

// take implementation when only values contain nulls
unsafe fn take_values_validity(
values: &BooleanArray,
indices: &[IdxSize],
) -> (Bitmap, Option<Bitmap>) {
let validity_values = values.validity().unwrap();
let validity = take_values(validity_values, indices);

let values_values = values.values();
let buffer = take_values(values_values, indices);

(buffer, validity.into())
}

// take implementation when only indices contain nulls
unsafe fn take_indices_validity(
values: &Bitmap,
indices: &PrimitiveArray<IdxSize>,
) -> (Bitmap, Option<Bitmap>) {
// simply take all and copy the bitmap
let buffer = take_values(values, indices.values());

(buffer, indices.validity().cloned())
}

// take implementation when both values and indices contain nulls
unsafe fn take_values_indices_validity(
values: &BooleanArray,
indices: &PrimitiveArray<IdxSize>,
) -> (Bitmap, Option<Bitmap>) {
let mut validity = MutableBitmap::with_capacity(indices.len());

let values_validity = values.validity().unwrap();

let values_values = values.values();
let values = indices.iter().map(|index| match index {
Some(&index) => {
let index = index as usize;
debug_assert!(index < values.len());
validity.push(values_validity.get_bit_unchecked(index));
values_values.get_bit_unchecked(index)
}
None => {
validity.push(false);
false
}
});
let values = Bitmap::from_trusted_len_iter(values);
(values, validity.into())
}

/// `take` implementation for boolean arrays
pub unsafe fn take_unchecked(
values: &BooleanArray,
indices: &PrimitiveArray<IdxSize>,
) -> BooleanArray {
let data_type = values.data_type().clone();
let indices_has_validity = indices.null_count() > 0;
let values_has_validity = values.null_count() > 0;

let (values, validity) = match (values_has_validity, indices_has_validity) {
(false, false) => take_no_validity(values.values(), indices.values()),
(true, false) => take_values_validity(values, indices.values()),
(false, true) => take_indices_validity(values.values(), indices),
(true, true) => take_values_indices_validity(values, indices),
};

BooleanArray::new(data_type, values, validity)
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
mod boolean;

use crate::trusted_len::{PushUnchecked, TrustedLen};
use crate::utils::with_match_primitive_type;
use crate::{bit_util::unset_bit_raw, prelude::*, utils::CustomIterTools};
Expand All @@ -24,6 +26,10 @@ pub unsafe fn take_unchecked(arr: &dyn Array, idx: &IdxArr) -> ArrayRef {
let arr = arr.as_any().downcast_ref().unwrap();
take_utf8_unchecked(arr, idx)
}
Boolean => {
let arr = arr.as_any().downcast_ref().unwrap();
Box::new(boolean::take_unchecked(arr, idx))
}
// TODO! implement proper unchecked version
#[cfg(feature = "compute")]
_ => {
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-arrow/src/kernels/list.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::kernels::take::take_unchecked;
use crate::compute::take::take_unchecked;
use crate::prelude::*;
use crate::trusted_len::PushUnchecked;
use crate::utils::CustomIterTools;
Expand Down
1 change: 0 additions & 1 deletion polars/polars-arrow/src/kernels/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ pub mod sort_partition;
pub mod sorted_join;
#[cfg(feature = "strings")]
pub mod string;
pub mod take;
pub mod take_agg;

/// Internal state of [SlicesIterator]
Expand Down
1 change: 0 additions & 1 deletion polars/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,6 @@ features = [
"compute_concatenate",
"compute_filter",
"compute_if_then_else",
"compute_take",
]

[dev-dependencies]
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/kernels/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::prelude::*;
use arrow::bitmap::MutableBitmap;
use polars_arrow::array::PolarsArray;
use polars_arrow::bit_util::unset_bit_raw;
use polars_arrow::kernels::take::take_value_indices_from_list;
use polars_arrow::compute::take::take_value_indices_from_list;
use std::convert::TryFrom;

/// Take kernel for multiple chunks. We directly return a ChunkedArray because that path chooses the fastest collection path.
Expand Down
6 changes: 2 additions & 4 deletions polars/polars-core/src/chunked_array/ops/take/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
//! IntoTakeRandom provides structs that implement the TakeRandom trait.
//! There are several structs that implement the fastest path for random access.
//!

use arrow::compute::take::take;
use polars_arrow::kernels::take::*;
use polars_arrow::compute::take::*;

use crate::chunked_array::kernels::take::*;

Expand Down Expand Up @@ -175,7 +173,7 @@ impl ChunkTake for BooleanChunked {
return Self::full_null(self.name(), array.len());
}
let array = match self.chunks.len() {
1 => take::take(chunks.next().unwrap(), array).unwrap(),
1 => take::take_unchecked(chunks.next().unwrap(), array),
_ => {
return if !array.has_validity() {
let iter = array.values().iter().map(|i| *i as usize);
Expand Down

0 comments on commit cdc8554

Please sign in to comment.