Skip to content

Commit

Permalink
move transmuting to separate PR
Browse files Browse the repository at this point in the history
  • Loading branch information
orlp committed Oct 21, 2024
1 parent eea2b96 commit 44f0690
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 177 deletions.
2 changes: 1 addition & 1 deletion crates/polars-arrow/src/buffer/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ impl<T> Buffer<T> {
}

/// Auxiliary method to create a new Buffer
pub fn from_storage(storage: SharedStorage<T>) -> Self {
pub(crate) fn from_storage(storage: SharedStorage<T>) -> Self {
let ptr = storage.as_ptr();
let length = storage.len();
Buffer {
Expand Down
129 changes: 5 additions & 124 deletions crates/polars-arrow/src/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,10 @@ use std::ops::Deref;
use std::ptr::NonNull;
use std::sync::atomic::{AtomicU64, Ordering};

use bytemuck::Pod;

use crate::ffi::InternalArrowArray;
use crate::types::{
AlignedBytes, Bytes12Alignment4, Bytes16Alignment16, Bytes16Alignment4, Bytes16Alignment8,
Bytes1Alignment1, Bytes2Alignment2, Bytes32Alignment16, Bytes4Alignment4, Bytes8Alignment4,
Bytes8Alignment8, NativeSizeAlignment,
};

enum BackingStorage {
Vec {
capacity: usize,

/// Size and alignment of the original vector type.
///
/// We have the following invariants:
/// - if this is Some(...) then all alignments involved are a power of 2
/// - align_of(Original) >= align_of(Current)
/// - size_of(Original) >= size_of(Current)
/// - size_of(Original) % size_of(Current) == 0
original_element_size_alignment: Option<NativeSizeAlignment>,
},
Vec { capacity: usize },
InternalArrowArray(InternalArrowArray),
}

Expand All @@ -42,53 +24,8 @@ impl<T> Drop for SharedStorageInner<T> {
fn drop(&mut self) {
match self.backing.take() {
Some(BackingStorage::InternalArrowArray(a)) => drop(a),
Some(BackingStorage::Vec {
capacity,
original_element_size_alignment,
}) => {
#[inline]
unsafe fn drop_vec<T, O>(ptr: *mut T, length: usize, capacity: usize) {
let ptr = ptr.cast::<O>();
debug_assert!(ptr.is_aligned());

debug_assert!(size_of::<O>() >= size_of::<T>());
debug_assert_eq!(size_of::<O>() % size_of::<T>(), 0);

let scale_factor = size_of::<O>() / size_of::<T>();

// If the original element had a different size_of we need to rescale the
// length and capacity here.
let length = length / scale_factor;
let capacity = capacity / scale_factor;

// SAFETY:
// - The BackingStorage holds an invariants that make this safe
drop(unsafe { Vec::from_raw_parts(ptr, length, capacity) });
}

let ptr = self.ptr;
let length = self.length;

let Some(size_alignment) = original_element_size_alignment else {
unsafe { drop_vec::<T, T>(ptr, length, capacity) };
return;
};

use NativeSizeAlignment as SA;
unsafe {
match size_alignment {
SA::S1A1 => drop_vec::<T, Bytes1Alignment1>(ptr, length, capacity),
SA::S2A2 => drop_vec::<T, Bytes2Alignment2>(ptr, length, capacity),
SA::S4A4 => drop_vec::<T, Bytes4Alignment4>(ptr, length, capacity),
SA::S8A4 => drop_vec::<T, Bytes8Alignment4>(ptr, length, capacity),
SA::S8A8 => drop_vec::<T, Bytes8Alignment8>(ptr, length, capacity),
SA::S12A4 => drop_vec::<T, Bytes12Alignment4>(ptr, length, capacity),
SA::S16A4 => drop_vec::<T, Bytes16Alignment4>(ptr, length, capacity),
SA::S16A8 => drop_vec::<T, Bytes16Alignment8>(ptr, length, capacity),
SA::S16A16 => drop_vec::<T, Bytes16Alignment16>(ptr, length, capacity),
SA::S32A16 => drop_vec::<T, Bytes32Alignment16>(ptr, length, capacity),
}
}
Some(BackingStorage::Vec { capacity }) => unsafe {
drop(Vec::from_raw_parts(self.ptr, self.length, capacity))
},
None => {},
}
Expand Down Expand Up @@ -129,10 +66,7 @@ impl<T> SharedStorage<T> {
ref_count: AtomicU64::new(1),
ptr,
length,
backing: Some(BackingStorage::Vec {
capacity,
original_element_size_alignment: None,
}),
backing: Some(BackingStorage::Vec { capacity }),
phantom: PhantomData,
};
Self {
Expand Down Expand Up @@ -192,11 +126,7 @@ impl<T> SharedStorage<T> {
}

pub fn try_into_vec(mut self) -> Result<Vec<T>, Self> {
let Some(BackingStorage::Vec {
capacity,
original_element_size_alignment: None,
}) = self.inner().backing
else {
let Some(BackingStorage::Vec { capacity }) = self.inner().backing else {
return Err(self);
};
if self.is_exclusive() {
Expand All @@ -221,55 +151,6 @@ impl<T> SharedStorage<T> {
}
}

impl<T: Pod> SharedStorage<T> {
/// Create a [`SharedStorage`] from a [`Vec`] of [`AlignedBytes`].
///
/// This will fail if the size and alignment requirements of `T` are stricter than `B`.
pub fn from_aligned_bytes_vec<B: AlignedBytes>(mut v: Vec<B>) -> Option<Self> {
if align_of::<B>() < align_of::<T>() {
return None;
}

// @NOTE: This is not a fundamental limitation, but something we impose for now. This makes
// calculating the capacity a lot easier.
if size_of::<B>() < size_of::<T>() || size_of::<B>() % size_of::<T>() != 0 {
return None;
}

let scale_factor = size_of::<B>() / size_of::<T>();

let length = v.len() * scale_factor;
let capacity = v.capacity() * scale_factor;
let ptr = v.as_mut_ptr().cast::<T>();
core::mem::forget(v);

let inner = SharedStorageInner {
ref_count: AtomicU64::new(1),
ptr,
length,
backing: Some(BackingStorage::Vec {
capacity,
original_element_size_alignment: Some(B::SIZE_ALIGNMENT_PAIR),
}),
phantom: PhantomData,
};

Some(Self {
inner: NonNull::new(Box::into_raw(Box::new(inner))).unwrap(),
phantom: PhantomData,
})
}
}

impl SharedStorage<u8> {
/// Create a [`SharedStorage<u8>`][SharedStorage] from a [`Vec`] of [`AlignedBytes`].
///
/// This will never fail since `u8` has unit size and alignment.
pub fn bytes_from_aligned_bytes_vec<B: AlignedBytes>(v: Vec<B>) -> Self {
Self::from_aligned_bytes_vec(v).unwrap()
}
}

impl<T> Deref for SharedStorage<T> {
type Target = [T];

Expand Down
63 changes: 11 additions & 52 deletions crates/polars-arrow/src/types/aligned_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ pub unsafe trait AlignedBytesCast<B: AlignedBytes>: Pod {}
pub trait AlignedBytes: Pod + Zeroable + Copy + Default + Eq {
const ALIGNMENT: usize;
const SIZE: usize;
const SIZE_ALIGNMENT_PAIR: NativeSizeAlignment;

type Unaligned: AsRef<[u8]>
+ AsMut<[u8]>
Expand Down Expand Up @@ -46,7 +45,7 @@ pub trait AlignedBytes: Pod + Zeroable + Copy + Default + Eq {

macro_rules! impl_aligned_bytes {
(
$(($name:ident, $size:literal, $alignment:literal, $sap:ident, [$($eq_type:ty),*]),)+
$(($name:ident, $size:literal, $alignment:literal, [$($eq_type:ty),*]),)+
) => {
$(
/// Bytes with a size and alignment.
Expand All @@ -60,7 +59,6 @@ macro_rules! impl_aligned_bytes {
impl AlignedBytes for $name {
const ALIGNMENT: usize = $alignment;
const SIZE: usize = $size;
const SIZE_ALIGNMENT_PAIR: NativeSizeAlignment = NativeSizeAlignment::$sap;

type Unaligned = [u8; $size];

Expand Down Expand Up @@ -100,54 +98,15 @@ macro_rules! impl_aligned_bytes {
}
}

/// A pair of size and alignment that is being used by a [`NativeType`][super::NativeType].
#[derive(Clone, Copy)]
pub enum NativeSizeAlignment {
S1A1,
S2A2,
S4A4,
S8A4,
S8A8,
S12A4,
S16A4,
S16A8,
S16A16,
S32A16,
}

impl NativeSizeAlignment {
pub const fn size(self) -> usize {
match self {
Self::S1A1 => 1,
Self::S2A2 => 2,
Self::S4A4 => 4,
Self::S8A4 | Self::S8A8 => 8,
Self::S12A4 => 12,
Self::S16A4 | Self::S16A8 | Self::S16A16 => 16,
Self::S32A16 => 32,
}
}

pub const fn alignment(self) -> usize {
match self {
Self::S1A1 => 1,
Self::S2A2 => 2,
Self::S4A4 | Self::S8A4 | Self::S12A4 | Self::S16A4 => 4,
Self::S8A8 | Self::S16A8 => 8,
Self::S16A16 | Self::S32A16 => 16,
}
}
}

impl_aligned_bytes! {
(Bytes1Alignment1, 1, 1, S1A1, [u8, i8]),
(Bytes2Alignment2, 2, 2, S2A2, [u16, i16, f16]),
(Bytes4Alignment4, 4, 4, S4A4, [u32, i32, f32]),
(Bytes8Alignment8, 8, 8, S8A8, [u64, i64, f64]),
(Bytes8Alignment4, 8, 4, S8A4, [days_ms]),
(Bytes12Alignment4, 12, 4, S12A4, [[u32; 3]]),
(Bytes16Alignment4, 16, 4, S16A4, [View]),
(Bytes16Alignment8, 16, 8, S16A8, [months_days_ns]),
(Bytes16Alignment16, 16, 16, S16A16, [u128, i128]),
(Bytes32Alignment16, 32, 16, S32A16, [i256]),
(Bytes1Alignment1, 1, 1, [u8, i8]),
(Bytes2Alignment2, 2, 2, [u16, i16, f16]),
(Bytes4Alignment4, 4, 4, [u32, i32, f32]),
(Bytes8Alignment8, 8, 8, [u64, i64, f64]),
(Bytes8Alignment4, 8, 4, [days_ms]),
(Bytes12Alignment4, 12, 4, [[u32; 3]]),
(Bytes16Alignment4, 16, 4, [View]),
(Bytes16Alignment8, 16, 8, [months_days_ns]),
(Bytes16Alignment16, 16, 16, [u128, i128]),
(Bytes32Alignment16, 32, 16, [i256]),
}

0 comments on commit 44f0690

Please sign in to comment.