Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Migrated to portable simd (#747)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Mar 5, 2022
1 parent c999595 commit 25b8ef8
Show file tree
Hide file tree
Showing 12 changed files with 99 additions and 71 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ jobs:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
toolchain: nightly-2021-12-10
toolchain: nightly-2022-03-03
override: true
- uses: Swatinem/rust-cache@v1
with:
Expand All @@ -99,7 +99,7 @@ jobs:
submodules: true # needed to test IPC, which are located in a submodule
- uses: actions-rs/toolchain@v1
with:
toolchain: nightly-2021-12-10
toolchain: nightly-2022-03-03
override: true
- uses: Swatinem/rust-cache@v1
with:
Expand Down Expand Up @@ -189,7 +189,7 @@ jobs:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
toolchain: nightly-2022-01-17
toolchain: nightly-2022-03-03
override: true
- uses: Swatinem/rust-cache@v1
- name: Run
Expand Down
4 changes: 1 addition & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@ itertools = { version = "^0.10", optional = true }

base64 = { version = "0.13.0", optional = true }

packed_simd = { version = "0.3", optional = true, package = "packed_simd_2" }

# to write to parquet as a stream
futures = { version = "0.3", optional = true }

Expand Down Expand Up @@ -216,8 +214,8 @@ compute = [
"compute_window"
]
benchmarks = ["rand"]
simd = ["packed_simd"]
serde_types = ["serde", "serde_derive"]
simd = []

[package.metadata.cargo-all-features]
allowlist = ["compute", "compute_sort", "compute_hash", "compute_nullif"]
Expand Down
15 changes: 12 additions & 3 deletions benches/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ use arrow2::array::*;
use arrow2::compute::aggregate::*;
use arrow2::util::bench_util::*;

fn bench_sum(arr_a: &PrimitiveArray<f32>) {
fn bench_sum(arr_a: &dyn Array) {
sum(criterion::black_box(arr_a)).unwrap();
}

fn bench_min(arr_a: &PrimitiveArray<f32>) {
min_primitive(criterion::black_box(arr_a)).unwrap();
fn bench_min(arr_a: &dyn Array) {
min(criterion::black_box(arr_a)).unwrap();
}

fn add_benchmark(c: &mut Criterion) {
Expand All @@ -24,6 +24,15 @@ fn add_benchmark(c: &mut Criterion) {
b.iter(|| bench_min(&arr_a))
});

let arr_a = create_primitive_array::<i32>(size, 0.0);

c.bench_function(&format!("sum 2^{} i32", log2_size), |b| {
b.iter(|| bench_sum(&arr_a))
});
c.bench_function(&format!("min 2^{} i32", log2_size), |b| {
b.iter(|| bench_min(&arr_a))
});

let arr_a = create_primitive_array::<f32>(size, 0.1);

c.bench_function(&format!("sum null 2^{} f32", log2_size), |b| {
Expand Down
20 changes: 10 additions & 10 deletions src/compute/aggregate/min_max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ pub trait SimdOrd<T> {
/// reduce itself to the maximum
fn min_element(self) -> T;
/// lane-wise maximum between two instances
fn max(self, x: Self) -> Self;
fn max_lane(self, x: Self) -> Self;
/// lane-wise minimum between two instances
fn min(self, x: Self) -> Self;
fn min_lane(self, x: Self) -> Self;
/// returns a new instance with all lanes equal to `MIN`
fn new_min() -> Self;
/// returns a new instance with all lanes equal to `MAX`
Expand Down Expand Up @@ -120,11 +120,11 @@ where

let chunk_reduced = chunks.fold(T::Simd::new_min(), |acc, chunk| {
let chunk = T::Simd::from_chunk(chunk);
acc.min(chunk)
acc.min_lane(chunk)
});

let remainder = T::Simd::from_incomplete_chunk(remainder, T::Simd::MAX);
let reduced = chunk_reduced.min(remainder);
let reduced = chunk_reduced.min_lane(remainder);

reduced.min_element()
}
Expand All @@ -143,14 +143,14 @@ where
let chunk = T::Simd::from_chunk(chunk);
let mask = <T::Simd as NativeSimd>::Mask::from_chunk(validity_chunk);
let chunk = chunk.select(mask, T::Simd::new_min());
acc.min(chunk)
acc.min_lane(chunk)
},
);

let remainder = T::Simd::from_incomplete_chunk(chunks.remainder(), T::Simd::MAX);
let mask = <T::Simd as NativeSimd>::Mask::from_chunk(validity_masks.remainder());
let remainder = remainder.select(mask, T::Simd::new_min());
let reduced = chunk_reduced.min(remainder);
let reduced = chunk_reduced.min_lane(remainder);

reduced.min_element()
}
Expand Down Expand Up @@ -199,11 +199,11 @@ where

let chunk_reduced = chunks.fold(T::Simd::new_max(), |acc, chunk| {
let chunk = T::Simd::from_chunk(chunk);
acc.max(chunk)
acc.max_lane(chunk)
});

let remainder = T::Simd::from_incomplete_chunk(remainder, T::Simd::MIN);
let reduced = chunk_reduced.max(remainder);
let reduced = chunk_reduced.max_lane(remainder);

reduced.max_element()
}
Expand All @@ -222,14 +222,14 @@ where
let chunk = T::Simd::from_chunk(chunk);
let mask = <T::Simd as NativeSimd>::Mask::from_chunk(validity_chunk);
let chunk = chunk.select(mask, T::Simd::new_max());
acc.max(chunk)
acc.max_lane(chunk)
},
);

let remainder = T::Simd::from_incomplete_chunk(chunks.remainder(), T::Simd::MIN);
let mask = <T::Simd as NativeSimd>::Mask::from_chunk(validity_masks.remainder());
let remainder = remainder.select(mask, T::Simd::new_max());
let reduced = chunk_reduced.max(remainder);
let reduced = chunk_reduced.max_lane(remainder);

reduced.max_element()
}
Expand Down
8 changes: 4 additions & 4 deletions src/compute/aggregate/simd/native.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ macro_rules! simd_ord_int {
}

#[inline]
fn max(self, x: Self) -> Self {
fn max_lane(self, x: Self) -> Self {
let mut result = <$simd>::default();
result
.0
Expand All @@ -82,7 +82,7 @@ macro_rules! simd_ord_int {
}

#[inline]
fn min(self, x: Self) -> Self {
fn min_lane(self, x: Self) -> Self {
let mut result = <$simd>::default();
result
.0
Expand Down Expand Up @@ -123,7 +123,7 @@ macro_rules! simd_ord_float {
}

#[inline]
fn max(self, x: Self) -> Self {
fn max_lane(self, x: Self) -> Self {
let mut result = <$simd>::default();
result
.0
Expand All @@ -135,7 +135,7 @@ macro_rules! simd_ord_float {
}

#[inline]
fn min(self, x: Self) -> Self {
fn min_lane(self, x: Self) -> Self {
let mut result = <$simd>::default();
result
.0
Expand Down
36 changes: 18 additions & 18 deletions src/compute/aggregate/simd/packed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,16 @@ macro_rules! simd_sum {
};
}

simd_sum!(f32x16, f32, sum);
simd_sum!(f64x8, f64, sum);
simd_sum!(u8x64, u8, wrapping_sum);
simd_sum!(u16x32, u16, wrapping_sum);
simd_sum!(u32x16, u32, wrapping_sum);
simd_sum!(u64x8, u64, wrapping_sum);
simd_sum!(i8x64, i8, wrapping_sum);
simd_sum!(i16x32, i16, wrapping_sum);
simd_sum!(i32x16, i32, wrapping_sum);
simd_sum!(i64x8, i64, wrapping_sum);
simd_sum!(f32x16, f32, horizontal_sum);
simd_sum!(f64x8, f64, horizontal_sum);
simd_sum!(u8x64, u8, horizontal_sum);
simd_sum!(u16x32, u16, horizontal_sum);
simd_sum!(u32x16, u32, horizontal_sum);
simd_sum!(u64x8, u64, horizontal_sum);
simd_sum!(i8x64, i8, horizontal_sum);
simd_sum!(i16x32, i16, horizontal_sum);
simd_sum!(i32x16, i32, horizontal_sum);
simd_sum!(i64x8, i64, horizontal_sum);

macro_rules! simd_ord_int {
($simd:tt, $type:ty) => {
Expand All @@ -33,21 +33,21 @@ macro_rules! simd_ord_int {

#[inline]
fn max_element(self) -> $type {
self.max_element()
self.horizontal_max()
}

#[inline]
fn min_element(self) -> $type {
self.min_element()
self.horizontal_min()
}

#[inline]
fn max(self, x: Self) -> Self {
fn max_lane(self, x: Self) -> Self {
self.max(x)
}

#[inline]
fn min(self, x: Self) -> Self {
fn min_lane(self, x: Self) -> Self {
self.min(x)
}

Expand All @@ -72,21 +72,21 @@ macro_rules! simd_ord_float {

#[inline]
fn max_element(self) -> $type {
self.max_element()
self.horizontal_max()
}

#[inline]
fn min_element(self) -> $type {
self.min_element()
self.horizontal_min()
}

#[inline]
fn max(self, x: Self) -> Self {
fn max_lane(self, x: Self) -> Self {
self.max(x)
}

#[inline]
fn min(self, x: Self) -> Self {
fn min_lane(self, x: Self) -> Self {
self.min(x)
}

Expand Down
20 changes: 10 additions & 10 deletions src/compute/comparison/simd/packed.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::convert::TryInto;
use std::simd::ToBitMask;

use packed_simd::*;

use crate::types::simd::*;
use crate::types::{days_ms, months_days_ns};

use super::*;
Expand All @@ -15,48 +15,48 @@ macro_rules! simd8 {
impl Simd8Lanes<$type> for $md {
#[inline]
fn from_chunk(v: &[$type]) -> Self {
<$md>::from_slice_unaligned(v)
<$md>::from_slice(v)
}

#[inline]
fn from_incomplete_chunk(v: &[$type], remaining: $type) -> Self {
let mut a = [remaining; 8];
a.iter_mut().zip(v.iter()).for_each(|(a, b)| *a = *b);
Self::from_chunk(a.as_ref())
Self::from_array(a)
}
}

impl Simd8PartialEq for $md {
#[inline]
fn eq(self, other: Self) -> u8 {
self.eq(other).bitmask()
self.lanes_eq(other).to_bitmask()
}

#[inline]
fn neq(self, other: Self) -> u8 {
self.ne(other).bitmask()
self.lanes_ne(other).to_bitmask()
}
}

impl Simd8PartialOrd for $md {
#[inline]
fn lt_eq(self, other: Self) -> u8 {
self.le(other).bitmask()
self.lanes_le(other).to_bitmask()
}

#[inline]
fn lt(self, other: Self) -> u8 {
self.lt(other).bitmask()
self.lanes_lt(other).to_bitmask()
}

#[inline]
fn gt_eq(self, other: Self) -> u8 {
self.ge(other).bitmask()
self.lanes_ge(other).to_bitmask()
}

#[inline]
fn gt(self, other: Self) -> u8 {
self.gt(other).bitmask()
self.lanes_gt(other).to_bitmask()
}
}
};
Expand Down
2 changes: 1 addition & 1 deletion src/doc/lib.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,5 +87,5 @@ functionality, such as:
* `compute` to operate on arrays (addition, sum, sort, etc.)

The feature `simd` (not part of `full`) produces more explicit SIMD instructions
via [`packed_simd`](https://github.com/rust-lang/packed_simd), but requires the
via [`std::simd`](https://doc.rust-lang.org/nightly/std/simd/index.html), but requires the
nightly channel.
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
//
#![allow(clippy::len_without_is_empty)]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![cfg_attr(feature = "simd", feature(portable_simd))]

#[macro_use]
pub mod array;
Expand Down
4 changes: 2 additions & 2 deletions src/types/simd/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Contains traits and implementations of multi-data used in SIMD.
//! The actual representation is driven by the feature flag `"simd"`, which, if set,
//! uses `packed_simd2` to get the intrinsics.
//! uses [`std::simd`].
use super::{days_ms, months_days_ns};
use super::{BitChunk, BitChunkIter, NativeType};

Expand All @@ -14,7 +14,7 @@ pub trait FromMaskChunk<T> {
/// # Safety
/// The `NativeType` and the `NativeSimd` must have possible a matching alignment.
/// e.g. slicing `&[NativeType]` by `align_of<NativeSimd>()` must be properly aligned/safe.
pub unsafe trait NativeSimd: Default + Copy {
pub unsafe trait NativeSimd: Sized + Default + Copy {
/// Number of lanes
const LANES: usize;
/// The [`NativeType`] of this struct. E.g. `f32` for a `NativeSimd = f32x16`.
Expand Down
Loading

0 comments on commit 25b8ef8

Please sign in to comment.