Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Replaced by intrinsics
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Mar 5, 2022
1 parent 2990e98 commit fb754d7
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 140 deletions.
4 changes: 2 additions & 2 deletions src/types/simd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ use super::{days_ms, months_days_ns};
use super::{BitChunk, BitChunkIter, NativeType};

/// Describes the ability to convert itself from a [`BitChunk`].
pub trait FromMaskChunk<T> {
/// Convert itself from a slice.
pub trait FromMaskChunk<T: BitChunk> {
/// Convert itself from [`BitChunk`].
fn from_chunk(v: T) -> Self;
}

Expand Down
147 changes: 9 additions & 138 deletions src/types/simd/packed.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
pub use std::simd::{
f32x16, f32x8, f64x8, i16x32, i16x8, i32x16, i32x8, i64x8, i8x64, i8x8, mask32x16 as m32x16,
mask64x8 as m64x8, mask8x64 as m8x64, u16x32, u16x8, u32x16, u32x8, u64x8, u8x64, u8x8,
f32x16, f32x8, f64x8, i16x32, i16x8, i32x16, i32x8, i64x8, i8x64, i8x8, mask16x32 as m16x32,
mask32x16 as m32x16, mask64x8 as m64x8, mask8x64 as m8x64, u16x32, u16x8, u32x16, u32x8, u64x8,
u8x64, u8x8, ToBitMask,
};

/// Vector of 32 16-bit masks
#[allow(non_camel_case_types)]
pub type m16x32 = std::simd::Mask<i16, 32>;

use super::*;

macro_rules! simd {
Expand Down Expand Up @@ -54,143 +51,17 @@ simd!(f32x16, f32, 16, u16, m32x16);
simd!(f64x8, f64, 8, u8, m64x8);

macro_rules! chunk_macro {
($type:ty, $chunk:ty, $simd:ty, $mask:tt, $m:expr) => {
($type:ty, $chunk:ty, $simd:ty, $mask:tt) => {
impl FromMaskChunk<$chunk> for $mask {
#[inline]
fn from_chunk(chunk: $chunk) -> Self {
($m)(chunk)
Self::from_bitmask(chunk)
}
}
};
}

chunk_macro!(u8, u64, u8x64, m8x64, from_chunk_u64);
chunk_macro!(u16, u32, u16x32, m16x32, from_chunk_u32);
chunk_macro!(u32, u16, u32x16, m32x16, from_chunk_u16);
chunk_macro!(u64, u8, u64x8, m64x8, from_chunk_u8);

#[inline]
fn from_chunk_u8(chunk: u8) -> m64x8 {
let idx = u64x8::from_array([1, 2, 4, 8, 16, 32, 64, 128]);
let vecmask = u64x8::splat(chunk as u64);

(idx & vecmask).lanes_eq(idx)
}

#[inline]
fn from_chunk_u16(chunk: u16) -> m32x16 {
let idx = u32x16::from_array([
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768,
]);
let vecmask = u32x16::splat(chunk as u32);

(idx & vecmask).lanes_eq(idx)
}

#[inline]
fn from_chunk_u32(chunk: u32) -> m16x32 {
let idx = u16x32::from_array([
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 1, 2, 4, 8,
16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768,
]);
let left = u16x32::from_chunk(&[
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
]);
let right = u16x32::from_chunk(&[
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512,
1024, 2048, 4096, 8192, 16384, 32768,
]);

let a = chunk.to_ne_bytes();
let a1 = u16::from_ne_bytes([a[2], a[3]]);
let a2 = u16::from_ne_bytes([a[0], a[1]]);

let vecmask1 = u16x32::splat(a1);
let vecmask2 = u16x32::splat(a2);

(idx & left & vecmask1).lanes_eq(idx) | (idx & right & vecmask2).lanes_eq(idx)
}

#[inline]
fn from_chunk_u64(chunk: u64) -> m8x64 {
let idx = u8x64::from_array([
1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128, 1,
2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128, 1, 2,
4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128,
]);
let idxs = [
u8x64::from_chunk(&[
1, 2, 4, 8, 16, 32, 64, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
]),
u8x64::from_chunk(&[
0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 8, 16, 32, 64, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
]),
u8x64::from_chunk(&[
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 8, 16, 32, 64, 128, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
]),
u8x64::from_chunk(&[
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 8, 16,
32, 64, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0,
]),
u8x64::from_chunk(&[
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 2, 4, 8, 16, 32, 64, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
]),
u8x64::from_chunk(&[
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 8, 16, 32, 64, 128, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
]),
u8x64::from_chunk(&[
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 8, 16, 32, 64, 128,
0, 0, 0, 0, 0, 0, 0, 0,
]),
u8x64::from_chunk(&[
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
4, 8, 16, 32, 64, 128,
]),
];

let a = chunk.to_ne_bytes();

let mut result = m8x64::default();
for i in 0..8 {
result |= (idxs[i] & u8x64::splat(a[i])).lanes_eq(idx)
}

result
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_basic1() {
let a = 0b00000001000000010000000100000001u32;
let a = from_chunk_u32(a);
for i in 0..32 {
assert_eq!(a.test(i), i % 8 == 0)
}
}

#[test]
fn test_basic2() {
let a = 0b0000000100000001000000010000000100000001000000010000000100000001u64;
let a = from_chunk_u64(a);
for i in 0..64 {
assert_eq!(a.test(i), i % 8 == 0)
}
}
}
chunk_macro!(u8, u64, u8x64, m8x64);
chunk_macro!(u16, u32, u16x32, m16x32);
chunk_macro!(u32, u16, u32x16, m32x16);
chunk_macro!(u64, u8, u64x8, m64x8);

0 comments on commit fb754d7

Please sign in to comment.