From 44f0690dc87aa55085556847016be7dec7a090a9 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Mon, 21 Oct 2024 15:20:06 +0200 Subject: [PATCH] move transmuting to separate PR --- crates/polars-arrow/src/buffer/immutable.rs | 2 +- crates/polars-arrow/src/storage.rs | 129 +----------------- .../polars-arrow/src/types/aligned_bytes.rs | 63 ++------- 3 files changed, 17 insertions(+), 177 deletions(-) diff --git a/crates/polars-arrow/src/buffer/immutable.rs b/crates/polars-arrow/src/buffer/immutable.rs index eb27a62341b9..1c6e5b5aa4ff 100644 --- a/crates/polars-arrow/src/buffer/immutable.rs +++ b/crates/polars-arrow/src/buffer/immutable.rs @@ -79,7 +79,7 @@ impl Buffer { } /// Auxiliary method to create a new Buffer - pub fn from_storage(storage: SharedStorage) -> Self { + pub(crate) fn from_storage(storage: SharedStorage) -> Self { let ptr = storage.as_ptr(); let length = storage.len(); Buffer { diff --git a/crates/polars-arrow/src/storage.rs b/crates/polars-arrow/src/storage.rs index 5f811d214ca0..e7656f9d880b 100644 --- a/crates/polars-arrow/src/storage.rs +++ b/crates/polars-arrow/src/storage.rs @@ -4,28 +4,10 @@ use std::ops::Deref; use std::ptr::NonNull; use std::sync::atomic::{AtomicU64, Ordering}; -use bytemuck::Pod; - use crate::ffi::InternalArrowArray; -use crate::types::{ - AlignedBytes, Bytes12Alignment4, Bytes16Alignment16, Bytes16Alignment4, Bytes16Alignment8, - Bytes1Alignment1, Bytes2Alignment2, Bytes32Alignment16, Bytes4Alignment4, Bytes8Alignment4, - Bytes8Alignment8, NativeSizeAlignment, -}; enum BackingStorage { - Vec { - capacity: usize, - - /// Size and alignment of the original vector type. - /// - /// We have the following invariants: - /// - if this is Some(...) then all alignments involved are a power of 2 - /// - align_of(Original) >= align_of(Current) - /// - size_of(Original) >= size_of(Current) - /// - size_of(Original) % size_of(Current) == 0 - original_element_size_alignment: Option, - }, + Vec { capacity: usize }, InternalArrowArray(InternalArrowArray), } @@ -42,53 +24,8 @@ impl Drop for SharedStorageInner { fn drop(&mut self) { match self.backing.take() { Some(BackingStorage::InternalArrowArray(a)) => drop(a), - Some(BackingStorage::Vec { - capacity, - original_element_size_alignment, - }) => { - #[inline] - unsafe fn drop_vec(ptr: *mut T, length: usize, capacity: usize) { - let ptr = ptr.cast::(); - debug_assert!(ptr.is_aligned()); - - debug_assert!(size_of::() >= size_of::()); - debug_assert_eq!(size_of::() % size_of::(), 0); - - let scale_factor = size_of::() / size_of::(); - - // If the original element had a different size_of we need to rescale the - // length and capacity here. - let length = length / scale_factor; - let capacity = capacity / scale_factor; - - // SAFETY: - // - The BackingStorage holds an invariants that make this safe - drop(unsafe { Vec::from_raw_parts(ptr, length, capacity) }); - } - - let ptr = self.ptr; - let length = self.length; - - let Some(size_alignment) = original_element_size_alignment else { - unsafe { drop_vec::(ptr, length, capacity) }; - return; - }; - - use NativeSizeAlignment as SA; - unsafe { - match size_alignment { - SA::S1A1 => drop_vec::(ptr, length, capacity), - SA::S2A2 => drop_vec::(ptr, length, capacity), - SA::S4A4 => drop_vec::(ptr, length, capacity), - SA::S8A4 => drop_vec::(ptr, length, capacity), - SA::S8A8 => drop_vec::(ptr, length, capacity), - SA::S12A4 => drop_vec::(ptr, length, capacity), - SA::S16A4 => drop_vec::(ptr, length, capacity), - SA::S16A8 => drop_vec::(ptr, length, capacity), - SA::S16A16 => drop_vec::(ptr, length, capacity), - SA::S32A16 => drop_vec::(ptr, length, capacity), - } - } + Some(BackingStorage::Vec { capacity }) => unsafe { + drop(Vec::from_raw_parts(self.ptr, self.length, capacity)) }, None => {}, } @@ -129,10 +66,7 @@ impl SharedStorage { ref_count: AtomicU64::new(1), ptr, length, - backing: Some(BackingStorage::Vec { - capacity, - original_element_size_alignment: None, - }), + backing: Some(BackingStorage::Vec { capacity }), phantom: PhantomData, }; Self { @@ -192,11 +126,7 @@ impl SharedStorage { } pub fn try_into_vec(mut self) -> Result, Self> { - let Some(BackingStorage::Vec { - capacity, - original_element_size_alignment: None, - }) = self.inner().backing - else { + let Some(BackingStorage::Vec { capacity }) = self.inner().backing else { return Err(self); }; if self.is_exclusive() { @@ -221,55 +151,6 @@ impl SharedStorage { } } -impl SharedStorage { - /// Create a [`SharedStorage`] from a [`Vec`] of [`AlignedBytes`]. - /// - /// This will fail if the size and alignment requirements of `T` are stricter than `B`. - pub fn from_aligned_bytes_vec(mut v: Vec) -> Option { - if align_of::() < align_of::() { - return None; - } - - // @NOTE: This is not a fundamental limitation, but something we impose for now. This makes - // calculating the capacity a lot easier. - if size_of::() < size_of::() || size_of::() % size_of::() != 0 { - return None; - } - - let scale_factor = size_of::() / size_of::(); - - let length = v.len() * scale_factor; - let capacity = v.capacity() * scale_factor; - let ptr = v.as_mut_ptr().cast::(); - core::mem::forget(v); - - let inner = SharedStorageInner { - ref_count: AtomicU64::new(1), - ptr, - length, - backing: Some(BackingStorage::Vec { - capacity, - original_element_size_alignment: Some(B::SIZE_ALIGNMENT_PAIR), - }), - phantom: PhantomData, - }; - - Some(Self { - inner: NonNull::new(Box::into_raw(Box::new(inner))).unwrap(), - phantom: PhantomData, - }) - } -} - -impl SharedStorage { - /// Create a [`SharedStorage`][SharedStorage] from a [`Vec`] of [`AlignedBytes`]. - /// - /// This will never fail since `u8` has unit size and alignment. - pub fn bytes_from_aligned_bytes_vec(v: Vec) -> Self { - Self::from_aligned_bytes_vec(v).unwrap() - } -} - impl Deref for SharedStorage { type Target = [T]; diff --git a/crates/polars-arrow/src/types/aligned_bytes.rs b/crates/polars-arrow/src/types/aligned_bytes.rs index 84329c2e5f8e..2c9bf9aed977 100644 --- a/crates/polars-arrow/src/types/aligned_bytes.rs +++ b/crates/polars-arrow/src/types/aligned_bytes.rs @@ -14,7 +14,6 @@ pub unsafe trait AlignedBytesCast: Pod {} pub trait AlignedBytes: Pod + Zeroable + Copy + Default + Eq { const ALIGNMENT: usize; const SIZE: usize; - const SIZE_ALIGNMENT_PAIR: NativeSizeAlignment; type Unaligned: AsRef<[u8]> + AsMut<[u8]> @@ -46,7 +45,7 @@ pub trait AlignedBytes: Pod + Zeroable + Copy + Default + Eq { macro_rules! impl_aligned_bytes { ( - $(($name:ident, $size:literal, $alignment:literal, $sap:ident, [$($eq_type:ty),*]),)+ + $(($name:ident, $size:literal, $alignment:literal, [$($eq_type:ty),*]),)+ ) => { $( /// Bytes with a size and alignment. @@ -60,7 +59,6 @@ macro_rules! impl_aligned_bytes { impl AlignedBytes for $name { const ALIGNMENT: usize = $alignment; const SIZE: usize = $size; - const SIZE_ALIGNMENT_PAIR: NativeSizeAlignment = NativeSizeAlignment::$sap; type Unaligned = [u8; $size]; @@ -100,54 +98,15 @@ macro_rules! impl_aligned_bytes { } } -/// A pair of size and alignment that is being used by a [`NativeType`][super::NativeType]. -#[derive(Clone, Copy)] -pub enum NativeSizeAlignment { - S1A1, - S2A2, - S4A4, - S8A4, - S8A8, - S12A4, - S16A4, - S16A8, - S16A16, - S32A16, -} - -impl NativeSizeAlignment { - pub const fn size(self) -> usize { - match self { - Self::S1A1 => 1, - Self::S2A2 => 2, - Self::S4A4 => 4, - Self::S8A4 | Self::S8A8 => 8, - Self::S12A4 => 12, - Self::S16A4 | Self::S16A8 | Self::S16A16 => 16, - Self::S32A16 => 32, - } - } - - pub const fn alignment(self) -> usize { - match self { - Self::S1A1 => 1, - Self::S2A2 => 2, - Self::S4A4 | Self::S8A4 | Self::S12A4 | Self::S16A4 => 4, - Self::S8A8 | Self::S16A8 => 8, - Self::S16A16 | Self::S32A16 => 16, - } - } -} - impl_aligned_bytes! { - (Bytes1Alignment1, 1, 1, S1A1, [u8, i8]), - (Bytes2Alignment2, 2, 2, S2A2, [u16, i16, f16]), - (Bytes4Alignment4, 4, 4, S4A4, [u32, i32, f32]), - (Bytes8Alignment8, 8, 8, S8A8, [u64, i64, f64]), - (Bytes8Alignment4, 8, 4, S8A4, [days_ms]), - (Bytes12Alignment4, 12, 4, S12A4, [[u32; 3]]), - (Bytes16Alignment4, 16, 4, S16A4, [View]), - (Bytes16Alignment8, 16, 8, S16A8, [months_days_ns]), - (Bytes16Alignment16, 16, 16, S16A16, [u128, i128]), - (Bytes32Alignment16, 32, 16, S32A16, [i256]), + (Bytes1Alignment1, 1, 1, [u8, i8]), + (Bytes2Alignment2, 2, 2, [u16, i16, f16]), + (Bytes4Alignment4, 4, 4, [u32, i32, f32]), + (Bytes8Alignment8, 8, 8, [u64, i64, f64]), + (Bytes8Alignment4, 8, 4, [days_ms]), + (Bytes12Alignment4, 12, 4, [[u32; 3]]), + (Bytes16Alignment4, 16, 4, [View]), + (Bytes16Alignment8, 16, 8, [months_days_ns]), + (Bytes16Alignment16, 16, 16, [u128, i128]), + (Bytes32Alignment16, 32, 16, [i256]), }