From fb754d7c36beeabf20559ea4a525f83d6d4765d1 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Sat, 5 Mar 2022 15:49:47 +0000 Subject: [PATCH] Replaced by intrinsics --- src/types/simd/mod.rs | 4 +- src/types/simd/packed.rs | 147 +++------------------------------------ 2 files changed, 11 insertions(+), 140 deletions(-) diff --git a/src/types/simd/mod.rs b/src/types/simd/mod.rs index 9db728b565e..87dbd980e9d 100644 --- a/src/types/simd/mod.rs +++ b/src/types/simd/mod.rs @@ -5,8 +5,8 @@ use super::{days_ms, months_days_ns}; use super::{BitChunk, BitChunkIter, NativeType}; /// Describes the ability to convert itself from a [`BitChunk`]. -pub trait FromMaskChunk { - /// Convert itself from a slice. +pub trait FromMaskChunk { + /// Convert itself from [`BitChunk`]. fn from_chunk(v: T) -> Self; } diff --git a/src/types/simd/packed.rs b/src/types/simd/packed.rs index 0c03327212a..8ace22da094 100644 --- a/src/types/simd/packed.rs +++ b/src/types/simd/packed.rs @@ -1,12 +1,9 @@ pub use std::simd::{ - f32x16, f32x8, f64x8, i16x32, i16x8, i32x16, i32x8, i64x8, i8x64, i8x8, mask32x16 as m32x16, - mask64x8 as m64x8, mask8x64 as m8x64, u16x32, u16x8, u32x16, u32x8, u64x8, u8x64, u8x8, + f32x16, f32x8, f64x8, i16x32, i16x8, i32x16, i32x8, i64x8, i8x64, i8x8, mask16x32 as m16x32, + mask32x16 as m32x16, mask64x8 as m64x8, mask8x64 as m8x64, u16x32, u16x8, u32x16, u32x8, u64x8, + u8x64, u8x8, ToBitMask, }; -/// Vector of 32 16-bit masks -#[allow(non_camel_case_types)] -pub type m16x32 = std::simd::Mask; - use super::*; macro_rules! simd { @@ -54,143 +51,17 @@ simd!(f32x16, f32, 16, u16, m32x16); simd!(f64x8, f64, 8, u8, m64x8); macro_rules! chunk_macro { - ($type:ty, $chunk:ty, $simd:ty, $mask:tt, $m:expr) => { + ($type:ty, $chunk:ty, $simd:ty, $mask:tt) => { impl FromMaskChunk<$chunk> for $mask { #[inline] fn from_chunk(chunk: $chunk) -> Self { - ($m)(chunk) + Self::from_bitmask(chunk) } } }; } -chunk_macro!(u8, u64, u8x64, m8x64, from_chunk_u64); -chunk_macro!(u16, u32, u16x32, m16x32, from_chunk_u32); -chunk_macro!(u32, u16, u32x16, m32x16, from_chunk_u16); -chunk_macro!(u64, u8, u64x8, m64x8, from_chunk_u8); - -#[inline] -fn from_chunk_u8(chunk: u8) -> m64x8 { - let idx = u64x8::from_array([1, 2, 4, 8, 16, 32, 64, 128]); - let vecmask = u64x8::splat(chunk as u64); - - (idx & vecmask).lanes_eq(idx) -} - -#[inline] -fn from_chunk_u16(chunk: u16) -> m32x16 { - let idx = u32x16::from_array([ - 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, - ]); - let vecmask = u32x16::splat(chunk as u32); - - (idx & vecmask).lanes_eq(idx) -} - -#[inline] -fn from_chunk_u32(chunk: u32) -> m16x32 { - let idx = u16x32::from_array([ - 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 1, 2, 4, 8, - 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, - ]); - let left = u16x32::from_chunk(&[ - 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ]); - let right = u16x32::from_chunk(&[ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, - 1024, 2048, 4096, 8192, 16384, 32768, - ]); - - let a = chunk.to_ne_bytes(); - let a1 = u16::from_ne_bytes([a[2], a[3]]); - let a2 = u16::from_ne_bytes([a[0], a[1]]); - - let vecmask1 = u16x32::splat(a1); - let vecmask2 = u16x32::splat(a2); - - (idx & left & vecmask1).lanes_eq(idx) | (idx & right & vecmask2).lanes_eq(idx) -} - -#[inline] -fn from_chunk_u64(chunk: u64) -> m8x64 { - let idx = u8x64::from_array([ - 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128, 1, - 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, - 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128, - ]); - let idxs = [ - u8x64::from_chunk(&[ - 1, 2, 4, 8, 16, 32, 64, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ]), - u8x64::from_chunk(&[ - 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 8, 16, 32, 64, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ]), - u8x64::from_chunk(&[ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 8, 16, 32, 64, 128, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ]), - u8x64::from_chunk(&[ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 8, 16, - 32, 64, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - ]), - u8x64::from_chunk(&[ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 2, 4, 8, 16, 32, 64, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ]), - u8x64::from_chunk(&[ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 8, 16, 32, 64, 128, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - ]), - u8x64::from_chunk(&[ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 8, 16, 32, 64, 128, - 0, 0, 0, 0, 0, 0, 0, 0, - ]), - u8x64::from_chunk(&[ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, - 4, 8, 16, 32, 64, 128, - ]), - ]; - - let a = chunk.to_ne_bytes(); - - let mut result = m8x64::default(); - for i in 0..8 { - result |= (idxs[i] & u8x64::splat(a[i])).lanes_eq(idx) - } - - result -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_basic1() { - let a = 0b00000001000000010000000100000001u32; - let a = from_chunk_u32(a); - for i in 0..32 { - assert_eq!(a.test(i), i % 8 == 0) - } - } - - #[test] - fn test_basic2() { - let a = 0b0000000100000001000000010000000100000001000000010000000100000001u64; - let a = from_chunk_u64(a); - for i in 0..64 { - assert_eq!(a.test(i), i % 8 == 0) - } - } -} +chunk_macro!(u8, u64, u8x64, m8x64); +chunk_macro!(u16, u32, u16x32, m16x32); +chunk_macro!(u32, u16, u32x16, m32x16); +chunk_macro!(u64, u8, u64x8, m64x8);