From d738edc452b2c2a1d1d5b649c43ea490198bb2ac Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Sun, 25 Feb 2024 09:14:51 -0800 Subject: [PATCH 1/8] Enforce strict style consistency in pgrx::array --- pgrx/src/array.rs | 59 +++++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 35 deletions(-) diff --git a/pgrx/src/array.rs b/pgrx/src/array.rs index cf99dc555..438ce9103 100644 --- a/pgrx/src/array.rs +++ b/pgrx/src/array.rs @@ -9,12 +9,12 @@ //LICENSE Use of this source code is governed by the MIT license that can be found in the LICENSE file. #![allow(clippy::precedence)] use crate::datum::Array; -use crate::pg_sys; use crate::toast::{Toast, Toasty}; +use crate::{layout, pg_sys, varlena}; use bitvec::prelude::*; -use bitvec::ptr::{bitslice_from_raw_parts_mut, BitPtr, BitPtrError, Mut}; -use core::ptr::{slice_from_raw_parts_mut, NonNull}; -use core::slice; +use bitvec::ptr::{self as bitptr, BitPtr, BitPtrError, Mut}; +use core::ptr::{self, NonNull}; +use core::{mem, slice}; #[allow(non_snake_case)] #[inline(always)] @@ -38,10 +38,8 @@ const fn MAXALIGN(len: usize) -> usize { unsafe fn ARR_NDIM(a: *mut pg_sys::ArrayType) -> usize { // #define ARR_NDIM(a) ((a)->ndim) - unsafe { - // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer - (*a).ndim as usize - } + // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer + unsafe { (*a).ndim as usize } } /// # Safety @@ -51,10 +49,8 @@ unsafe fn ARR_NDIM(a: *mut pg_sys::ArrayType) -> usize { unsafe fn ARR_HASNULL(a: *mut pg_sys::ArrayType) -> bool { // #define ARR_HASNULL(a) ((a)->dataoffset != 0) - unsafe { - // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer - (*a).dataoffset != 0 - } + // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer + unsafe { (*a).dataoffset != 0 } } /// # Safety @@ -68,10 +64,8 @@ const unsafe fn ARR_DIMS(a: *mut pg_sys::ArrayType) -> *mut i32 { // #define ARR_DIMS(a) \ // ((int *) (((char *) (a)) + sizeof(ArrayType))) - unsafe { - // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer - a.cast::().add(std::mem::size_of::()).cast::() - } + // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer + unsafe { a.cast::().add(mem::size_of::()).cast::() } } /// # Safety @@ -80,10 +74,8 @@ const unsafe fn ARR_DIMS(a: *mut pg_sys::ArrayType) -> *mut i32 { #[allow(non_snake_case)] #[inline(always)] unsafe fn ARR_NELEMS(a: *mut pg_sys::ArrayType) -> usize { - unsafe { - // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer - pg_sys::ArrayGetNItems((*a).ndim, ARR_DIMS(a)) as usize - } + // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer + unsafe { pg_sys::ArrayGetNItems((*a).ndim, ARR_DIMS(a)) as usize } } /// Returns the "null bitmap" of the specified array. If there isn't one (the array contains no nulls) @@ -104,15 +96,13 @@ unsafe fn ARR_NULLBITMAP(a: *mut pg_sys::ArrayType) -> *mut pg_sys::bits8 { // : (bits8 *) NULL) // + // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer unsafe { - // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer if ARR_HASNULL(a) { - a.cast::().add( - std::mem::size_of::() - + 2 * std::mem::size_of::() * ARR_NDIM(a), - ) + a.cast::() + .add(mem::size_of::() + 2 * mem::size_of::() * ARR_NDIM(a)) } else { - std::ptr::null_mut() + ptr::null_mut() } } } @@ -125,7 +115,7 @@ const fn ARR_OVERHEAD_NONULLS(ndims: usize) -> usize { // #define ARR_OVERHEAD_NONULLS(ndims) \ // MAXALIGN(sizeof(ArrayType) + 2 * sizeof(int) * (ndims)) - MAXALIGN(std::mem::size_of::() + 2 * std::mem::size_of::() * ndims) + MAXALIGN(mem::size_of::() + 2 * mem::size_of::() * ndims) } /// # Safety @@ -137,8 +127,8 @@ unsafe fn ARR_DATA_OFFSET(a: *mut pg_sys::ArrayType) -> usize { // #define ARR_DATA_OFFSET(a) \ // (ARR_HASNULL(a) ? (a)->dataoffset : ARR_OVERHEAD_NONULLS(ARR_NDIM(a))) + // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer unsafe { - // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer if ARR_HASNULL(a) { (*a).dataoffset as _ } else { @@ -253,7 +243,7 @@ impl RawArray { #[allow(dead_code)] pub(crate) unsafe fn deconstruct( &mut self, - layout: crate::layout::Layout, + layout: layout::Layout, ) -> (*mut pg_sys::Datum, *mut bool) { let oid = self.oid(); let array = self.ptr.as_ptr(); @@ -268,7 +258,7 @@ impl RawArray { array, oid, layout.size.as_typlen().into(), - matches!(layout.pass, crate::layout::PassBy::Value), + matches!(layout.pass, layout::PassBy::Value), layout.align.as_typalign(), &mut elements, &mut nulls, @@ -412,7 +402,7 @@ impl RawArray { This is because, while the initial pointer is NonNull, ARR_NULLBITMAP can return a nullptr! */ - NonNull::new(slice_from_raw_parts_mut(self.nulls_mut_ptr(), len)) + NonNull::new(ptr::slice_from_raw_parts_mut(self.nulls_mut_ptr(), len)) } /** @@ -435,8 +425,7 @@ impl RawArray { This is because, while the initial pointer is NonNull, ARR_NULLBITMAP can return a nullptr! */ - - NonNull::new(bitslice_from_raw_parts_mut(self.nulls_bitptr()?, self.len)) + NonNull::new(bitptr::bitslice_from_raw_parts_mut(self.nulls_bitptr()?, self.len)) } /** @@ -503,7 +492,7 @@ impl RawArray { needing only their initial assertion regarding the type being correct. */ unsafe { - NonNull::new_unchecked(slice_from_raw_parts_mut( + NonNull::new_unchecked(ptr::slice_from_raw_parts_mut( ARR_DATA_PTR(self.ptr.as_ptr()).cast(), self.len, )) @@ -518,7 +507,7 @@ impl RawArray { /// "one past the end" pointer for the entire array's bytes pub(crate) fn end_ptr(&self) -> *const u8 { let ptr = self.ptr.as_ptr().cast::(); - ptr.wrapping_add(unsafe { crate::varlena::varsize_any(ptr.cast()) }) + ptr.wrapping_add(unsafe { varlena::varsize_any(ptr.cast()) }) } } From ad64c85fa6dd0ad1eb2535509be827b2b88f37d3 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Sun, 25 Feb 2024 09:20:53 -0800 Subject: [PATCH 2/8] Remove redundant SAFETY comments These are redundant with the public information documented on the fn, and are "safety" comments within safe functions. --- pgrx/src/array.rs | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/pgrx/src/array.rs b/pgrx/src/array.rs index 438ce9103..1d58ddce3 100644 --- a/pgrx/src/array.rs +++ b/pgrx/src/array.rs @@ -395,13 +395,6 @@ impl RawArray { pub fn nulls(&mut self) -> Option> { let len = self.len + 7 >> 3; // Obtains 0 if len was 0. - /* - SAFETY: This obtains the nulls pointer, which is valid to obtain because - the len was asserted on construction. However, unlike the other cases, - it isn't correct to trust it. Instead, this gets null-checked. - This is because, while the initial pointer is NonNull, - ARR_NULLBITMAP can return a nullptr! - */ NonNull::new(ptr::slice_from_raw_parts_mut(self.nulls_mut_ptr(), len)) } @@ -418,13 +411,6 @@ impl RawArray { [ARR_NULLBITMAP]: */ pub fn nulls_bitslice(&mut self) -> Option>> { - /* - SAFETY: This obtains the nulls pointer, which is valid to obtain because - the len was asserted on construction. However, unlike the other cases, - it isn't correct to trust it. Instead, this gets null-checked. - This is because, while the initial pointer is NonNull, - ARR_NULLBITMAP can return a nullptr! - */ NonNull::new(bitptr::bitslice_from_raw_parts_mut(self.nulls_bitptr()?, self.len)) } From 30b18342d7819de7c3a8377f198a73c97579de54 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Sun, 25 Feb 2024 09:42:46 -0800 Subject: [PATCH 3/8] Modularize pgrx/src/array/port.rs --- pgrx/src/array.rs | 152 +++-------------------------------------- pgrx/src/array/port.rs | 133 ++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+), 144 deletions(-) create mode 100644 pgrx/src/array/port.rs diff --git a/pgrx/src/array.rs b/pgrx/src/array.rs index 1d58ddce3..93434ad27 100644 --- a/pgrx/src/array.rs +++ b/pgrx/src/array.rs @@ -14,145 +14,9 @@ use crate::{layout, pg_sys, varlena}; use bitvec::prelude::*; use bitvec::ptr::{self as bitptr, BitPtr, BitPtrError, Mut}; use core::ptr::{self, NonNull}; -use core::{mem, slice}; - -#[allow(non_snake_case)] -#[inline(always)] -const fn TYPEALIGN(alignval: usize, len: usize) -> usize { - // #define TYPEALIGN(ALIGNVAL,LEN) \ - // (((uintptr_t) (LEN) + ((ALIGNVAL) - 1)) & ~((uintptr_t) ((ALIGNVAL) - 1))) - (len + (alignval - 1)) & !(alignval - 1) -} - -#[allow(non_snake_case)] -#[inline(always)] -const fn MAXALIGN(len: usize) -> usize { - // #define MAXALIGN(LEN) TYPEALIGN(MAXIMUM_ALIGNOF, (LEN)) - TYPEALIGN(pg_sys::MAXIMUM_ALIGNOF as _, len) -} - -/// # Safety -/// Does a field access, but doesn't deref out of bounds of ArrayType -#[allow(non_snake_case)] -#[inline(always)] -unsafe fn ARR_NDIM(a: *mut pg_sys::ArrayType) -> usize { - // #define ARR_NDIM(a) ((a)->ndim) - - // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer - unsafe { (*a).ndim as usize } -} - -/// # Safety -/// Does a field access, but doesn't deref out of bounds of ArrayType -#[allow(non_snake_case)] -#[inline(always)] -unsafe fn ARR_HASNULL(a: *mut pg_sys::ArrayType) -> bool { - // #define ARR_HASNULL(a) ((a)->dataoffset != 0) - - // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer - unsafe { (*a).dataoffset != 0 } -} - -/// # Safety -/// Does a field access, but doesn't deref out of bounds of ArrayType -/// -/// [`pg_sys::ArrayType`] is typically allocated past its size, and its somewhere in that region -/// that the returned pointer points, so don't attempt to `pfree` it. -#[allow(non_snake_case)] -#[inline(always)] -const unsafe fn ARR_DIMS(a: *mut pg_sys::ArrayType) -> *mut i32 { - // #define ARR_DIMS(a) \ - // ((int *) (((char *) (a)) + sizeof(ArrayType))) - - // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer - unsafe { a.cast::().add(mem::size_of::()).cast::() } -} - -/// # Safety -/// Does a field access and deref but not out of bounds of ArrayType. The caller asserts that -/// `a` is a properly allocated [`pg_sys::ArrayType`] -#[allow(non_snake_case)] -#[inline(always)] -unsafe fn ARR_NELEMS(a: *mut pg_sys::ArrayType) -> usize { - // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer - unsafe { pg_sys::ArrayGetNItems((*a).ndim, ARR_DIMS(a)) as usize } -} +use core::slice; -/// Returns the "null bitmap" of the specified array. If there isn't one (the array contains no nulls) -/// then the null pointer is returned. -/// -/// # Safety -/// Does a field access, but doesn't deref out of bounds of ArrayType. The caller asserts that -/// `a` is a properly allocated [`pg_sys::ArrayType`] -/// -/// [`pg_sys::ArrayType`] is typically allocated past its size, and its somewhere in that region -/// that the returned pointer points, so don't attempt to `pfree` it. -#[allow(non_snake_case)] -#[inline(always)] -unsafe fn ARR_NULLBITMAP(a: *mut pg_sys::ArrayType) -> *mut pg_sys::bits8 { - // #define ARR_NULLBITMAP(a) \ - // (ARR_HASNULL(a) ? \ - // (bits8 *) (((char *) (a)) + sizeof(ArrayType) + 2 * sizeof(int) * ARR_NDIM(a)) \ - // : (bits8 *) NULL) - // - - // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer - unsafe { - if ARR_HASNULL(a) { - a.cast::() - .add(mem::size_of::() + 2 * mem::size_of::() * ARR_NDIM(a)) - } else { - ptr::null_mut() - } - } -} - -/// The total array header size (in bytes) for an array with the specified -/// number of dimensions and total number of items. -#[allow(non_snake_case)] -#[inline(always)] -const fn ARR_OVERHEAD_NONULLS(ndims: usize) -> usize { - // #define ARR_OVERHEAD_NONULLS(ndims) \ - // MAXALIGN(sizeof(ArrayType) + 2 * sizeof(int) * (ndims)) - - MAXALIGN(mem::size_of::() + 2 * mem::size_of::() * ndims) -} - -/// # Safety -/// Does a field access, but doesn't deref out of bounds of ArrayType. The caller asserts that -/// `a` is a properly allocated [`pg_sys::ArrayType`] -#[allow(non_snake_case)] -#[inline(always)] -unsafe fn ARR_DATA_OFFSET(a: *mut pg_sys::ArrayType) -> usize { - // #define ARR_DATA_OFFSET(a) \ - // (ARR_HASNULL(a) ? (a)->dataoffset : ARR_OVERHEAD_NONULLS(ARR_NDIM(a))) - - // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer - unsafe { - if ARR_HASNULL(a) { - (*a).dataoffset as _ - } else { - ARR_OVERHEAD_NONULLS(ARR_NDIM(a)) - } - } -} - -/// Returns a pointer to the actual array data. -/// -/// # Safety -/// Does a field access, but doesn't deref out of bounds of ArrayType. The caller asserts that -/// `a` is a properly allocated [`pg_sys::ArrayType`] -/// -/// [`pg_sys::ArrayType`] is typically allocated past its size, and its somewhere in that region -/// that the returned pointer points, so don't attempt to `pfree` it. -#[allow(non_snake_case)] -#[inline(always)] -unsafe fn ARR_DATA_PTR(a: *mut pg_sys::ArrayType) -> *mut u8 { - // #define ARR_DATA_PTR(a) \ - // (((char *) (a)) + ARR_DATA_OFFSET(a)) - - unsafe { a.cast::().add(ARR_DATA_OFFSET(a)) } -} +mod port; /** An aligned, dereferenceable `NonNull` with low-level accessors. @@ -224,7 +88,7 @@ impl RawArray { */ pub unsafe fn from_ptr(ptr: NonNull) -> RawArray { // SAFETY: Validity asserted by the caller. - let len = unsafe { ARR_NELEMS(ptr.as_ptr()) } as usize; + let len = unsafe { port::ARR_NELEMS(ptr.as_ptr()) } as usize; RawArray { ptr, len } } @@ -275,7 +139,7 @@ impl RawArray { pub unsafe fn from_array(arr: Array) -> Option { let array_type = arr.into_array_type() as *mut _; // SAFETY: Validity asserted by the caller. - let len = unsafe { ARR_NELEMS(array_type) } as usize; + let len = unsafe { port::ARR_NELEMS(array_type) } as usize; Some(RawArray { ptr: NonNull::new(array_type)?, len }) } @@ -322,7 +186,7 @@ impl RawArray { */ unsafe { let ndim = self.ndim() as usize; - slice::from_raw_parts(ARR_DIMS(self.ptr.as_ptr()), ndim) + slice::from_raw_parts(port::ARR_DIMS(self.ptr.as_ptr()), ndim) } } @@ -366,7 +230,7 @@ impl RawArray { fn nulls_mut_ptr(&mut self) -> *mut u8 { // SAFETY: This isn't public for a reason: it's a maybe-null *mut BitSlice, which is easy to misuse. // Obtaining it, however, is perfectly safe. - unsafe { ARR_NULLBITMAP(self.ptr.as_ptr()) } + unsafe { port::ARR_NULLBITMAP(self.ptr.as_ptr()) } } #[inline] @@ -479,7 +343,7 @@ impl RawArray { */ unsafe { NonNull::new_unchecked(ptr::slice_from_raw_parts_mut( - ARR_DATA_PTR(self.ptr.as_ptr()).cast(), + port::ARR_DATA_PTR(self.ptr.as_ptr()).cast(), self.len, )) } @@ -487,7 +351,7 @@ impl RawArray { #[inline] pub(crate) fn data_ptr(&self) -> *const u8 { - unsafe { ARR_DATA_PTR(self.ptr.as_ptr()) } + unsafe { port::ARR_DATA_PTR(self.ptr.as_ptr()) } } /// "one past the end" pointer for the entire array's bytes diff --git a/pgrx/src/array/port.rs b/pgrx/src/array/port.rs new file mode 100644 index 000000000..8c3414bfe --- /dev/null +++ b/pgrx/src/array/port.rs @@ -0,0 +1,133 @@ +/*! Ported array macros and functions. +*/ +#![allow(non_snake_case)] +use crate::pg_sys; +use core::{mem, ptr}; + +#[inline(always)] +pub(super) const fn TYPEALIGN(alignval: usize, len: usize) -> usize { + // #define TYPEALIGN(ALIGNVAL,LEN) \ + // (((uintptr_t) (LEN) + ((ALIGNVAL) - 1)) & ~((uintptr_t) ((ALIGNVAL) - 1))) + (len + (alignval - 1)) & !(alignval - 1) +} + +#[inline(always)] +pub(super) const fn MAXALIGN(len: usize) -> usize { + // #define MAXALIGN(LEN) TYPEALIGN(MAXIMUM_ALIGNOF, (LEN)) + TYPEALIGN(pg_sys::MAXIMUM_ALIGNOF as _, len) +} + +/// # Safety +/// Does a field access, but doesn't deref out of bounds of ArrayType +#[inline(always)] +pub(super) unsafe fn ARR_NDIM(a: *mut pg_sys::ArrayType) -> usize { + // #define ARR_NDIM(a) ((a)->ndim) + + // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer + unsafe { (*a).ndim as usize } +} + +/// # Safety +/// Does a field access, but doesn't deref out of bounds of ArrayType +#[inline(always)] +pub(super) unsafe fn ARR_HASNULL(a: *mut pg_sys::ArrayType) -> bool { + // #define ARR_HASNULL(a) ((a)->dataoffset != 0) + + // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer + unsafe { (*a).dataoffset != 0 } +} + +/// # Safety +/// Does a field access, but doesn't deref out of bounds of ArrayType +/// +/// [`pg_sys::ArrayType`] is typically allocated past its size, and its somewhere in that region +/// that the returned pointer points, so don't attempt to `pfree` it. +#[inline(always)] +pub(super) const unsafe fn ARR_DIMS(a: *mut pg_sys::ArrayType) -> *mut i32 { + // #define ARR_DIMS(a) \ + // ((int *) (((char *) (a)) + sizeof(ArrayType))) + + // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer + unsafe { a.cast::().add(mem::size_of::()).cast::() } +} + +/// # Safety +/// Does a field access and deref but not out of bounds of ArrayType. The caller asserts that +/// `a` is a properly allocated [`pg_sys::ArrayType`] +#[inline(always)] +pub(super) unsafe fn ARR_NELEMS(a: *mut pg_sys::ArrayType) -> usize { + // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer + unsafe { pg_sys::ArrayGetNItems((*a).ndim, ARR_DIMS(a)) as usize } +} + +/// Returns the "null bitmap" of the specified array. If there isn't one (the array contains no nulls) +/// then the null pointer is returned. +/// +/// # Safety +/// Does a field access, but doesn't deref out of bounds of ArrayType. The caller asserts that +/// `a` is a properly allocated [`pg_sys::ArrayType`] +/// +/// [`pg_sys::ArrayType`] is typically allocated past its size, and its somewhere in that region +/// that the returned pointer points, so don't attempt to `pfree` it. +#[inline(always)] +pub(super) unsafe fn ARR_NULLBITMAP(a: *mut pg_sys::ArrayType) -> *mut pg_sys::bits8 { + // #define ARR_NULLBITMAP(a) \ + // (ARR_HASNULL(a) ? \ + // (bits8 *) (((char *) (a)) + sizeof(ArrayType) + 2 * sizeof(int) * ARR_NDIM(a)) \ + // : (bits8 *) NULL) + // + + // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer + unsafe { + if ARR_HASNULL(a) { + a.cast::() + .add(mem::size_of::() + 2 * mem::size_of::() * ARR_NDIM(a)) + } else { + ptr::null_mut() + } + } +} + +/// The total array header size (in bytes) for an array with the specified +/// number of dimensions and total number of items. +#[inline(always)] +pub(super) const fn ARR_OVERHEAD_NONULLS(ndims: usize) -> usize { + // #define ARR_OVERHEAD_NONULLS(ndims) \ + // MAXALIGN(sizeof(ArrayType) + 2 * sizeof(int) * (ndims)) + + MAXALIGN(mem::size_of::() + 2 * mem::size_of::() * ndims) +} + +/// # Safety +/// Does a field access, but doesn't deref out of bounds of ArrayType. The caller asserts that +/// `a` is a properly allocated [`pg_sys::ArrayType`] +#[inline(always)] +pub(super) unsafe fn ARR_DATA_OFFSET(a: *mut pg_sys::ArrayType) -> usize { + // #define ARR_DATA_OFFSET(a) \ + // (ARR_HASNULL(a) ? (a)->dataoffset : ARR_OVERHEAD_NONULLS(ARR_NDIM(a))) + + // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer + unsafe { + if ARR_HASNULL(a) { + (*a).dataoffset as _ + } else { + ARR_OVERHEAD_NONULLS(ARR_NDIM(a)) + } + } +} + +/// Returns a pointer to the actual array data. +/// +/// # Safety +/// Does a field access, but doesn't deref out of bounds of ArrayType. The caller asserts that +/// `a` is a properly allocated [`pg_sys::ArrayType`] +/// +/// [`pg_sys::ArrayType`] is typically allocated past its size, and its somewhere in that region +/// that the returned pointer points, so don't attempt to `pfree` it. +#[inline(always)] +pub(super) unsafe fn ARR_DATA_PTR(a: *mut pg_sys::ArrayType) -> *mut u8 { + // #define ARR_DATA_PTR(a) \ + // (((char *) (a)) + ARR_DATA_OFFSET(a)) + + unsafe { a.cast::().add(ARR_DATA_OFFSET(a)) } +} From c6e4785e46d5fb2e4e9025a0f9515f24e748fb5a Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Sun, 25 Feb 2024 21:41:14 -0800 Subject: [PATCH 4/8] Port pg_sys::ArrayGetNItems into RawArray::len This allows us to perform the calculation entirely in Rust, so there is now no more FFI overhead for creating a RawArray. This will become more useful when it is being used more vigorously as part of the implementation of arrays. --- pgrx/src/array.rs | 23 ++++++++++++----------- pgrx/src/array/port.rs | 9 --------- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/pgrx/src/array.rs b/pgrx/src/array.rs index 93434ad27..4faceb779 100644 --- a/pgrx/src/array.rs +++ b/pgrx/src/array.rs @@ -61,7 +61,6 @@ aligned varlena header, which will cause undefined behavior if it is interacted #[derive(Debug)] pub struct RawArray { ptr: NonNull, - len: usize, } #[deny(unsafe_op_in_unsafe_fn)] @@ -87,9 +86,7 @@ impl RawArray { [the std documentation]: core::ptr#safety */ pub unsafe fn from_ptr(ptr: NonNull) -> RawArray { - // SAFETY: Validity asserted by the caller. - let len = unsafe { port::ARR_NELEMS(ptr.as_ptr()) } as usize; - RawArray { ptr, len } + RawArray { ptr } } pub(crate) unsafe fn detoast_from_varlena(stale: NonNull) -> Toast { @@ -138,9 +135,7 @@ impl RawArray { /// or a null value, as-if [RawArray::from_ptr]. pub unsafe fn from_array(arr: Array) -> Option { let array_type = arr.into_array_type() as *mut _; - // SAFETY: Validity asserted by the caller. - let len = unsafe { port::ARR_NELEMS(array_type) } as usize; - Some(RawArray { ptr: NonNull::new(array_type)?, len }) + Some(RawArray { ptr: NonNull::new(array_type)? }) } /// Returns the inner raw pointer to the ArrayType. @@ -194,7 +189,13 @@ impl RawArray { /// Includes all items, even the ones that might be null. #[inline] pub fn len(&self) -> usize { - self.len + // Calculating the product with i32 mirrors the Postgres implementation, + // except we can use checked_mul instead of trying to cast to 64 bits and + // hoping that doesn't also overflow on multiplication. + self.dims() + .into_iter() + .fold(Some(1i32), |prod, &d| prod.and_then(|m| m.checked_mul(d))) + .unwrap() as usize } /// Accessor for ArrayType's elemtype. @@ -257,7 +258,7 @@ impl RawArray { [ARR_NULLBITMAP]: */ pub fn nulls(&mut self) -> Option> { - let len = self.len + 7 >> 3; // Obtains 0 if len was 0. + let len = self.len() + 7 >> 3; // Obtains 0 if len was 0. NonNull::new(ptr::slice_from_raw_parts_mut(self.nulls_mut_ptr(), len)) } @@ -275,7 +276,7 @@ impl RawArray { [ARR_NULLBITMAP]: */ pub fn nulls_bitslice(&mut self) -> Option>> { - NonNull::new(bitptr::bitslice_from_raw_parts_mut(self.nulls_bitptr()?, self.len)) + NonNull::new(bitptr::bitslice_from_raw_parts_mut(self.nulls_bitptr()?, self.len())) } /** @@ -344,7 +345,7 @@ impl RawArray { unsafe { NonNull::new_unchecked(ptr::slice_from_raw_parts_mut( port::ARR_DATA_PTR(self.ptr.as_ptr()).cast(), - self.len, + self.len(), )) } } diff --git a/pgrx/src/array/port.rs b/pgrx/src/array/port.rs index 8c3414bfe..ac2082ee3 100644 --- a/pgrx/src/array/port.rs +++ b/pgrx/src/array/port.rs @@ -51,15 +51,6 @@ pub(super) const unsafe fn ARR_DIMS(a: *mut pg_sys::ArrayType) -> *mut i32 { unsafe { a.cast::().add(mem::size_of::()).cast::() } } -/// # Safety -/// Does a field access and deref but not out of bounds of ArrayType. The caller asserts that -/// `a` is a properly allocated [`pg_sys::ArrayType`] -#[inline(always)] -pub(super) unsafe fn ARR_NELEMS(a: *mut pg_sys::ArrayType) -> usize { - // SAFETY: caller has asserted that `a` is a properly allocated ArrayType pointer - unsafe { pg_sys::ArrayGetNItems((*a).ndim, ARR_DIMS(a)) as usize } -} - /// Returns the "null bitmap" of the specified array. If there isn't one (the array contains no nulls) /// then the null pointer is returned. /// From 551905f6178fdaf6bd20a9046d2c09d3d0fe75fe Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Sun, 25 Feb 2024 23:17:55 -0800 Subject: [PATCH 5/8] Fix handling 0 dims --- pgrx/src/array.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pgrx/src/array.rs b/pgrx/src/array.rs index 4faceb779..8baca3324 100644 --- a/pgrx/src/array.rs +++ b/pgrx/src/array.rs @@ -192,10 +192,14 @@ impl RawArray { // Calculating the product with i32 mirrors the Postgres implementation, // except we can use checked_mul instead of trying to cast to 64 bits and // hoping that doesn't also overflow on multiplication. - self.dims() - .into_iter() - .fold(Some(1i32), |prod, &d| prod.and_then(|m| m.checked_mul(d))) - .unwrap() as usize + let dims = self.dims(); + if dims.len() == 0 { + 0 + } else { + dims.into_iter() + .fold(Some(1i32), |prod, &d| prod.and_then(|m| m.checked_mul(d))) + .expect("Product of array dimensions should be <= i32::MAX") as usize + } } /// Accessor for ArrayType's elemtype. From 90db55c7b253e14d755b46fa46e448839c1cde47 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Sun, 25 Feb 2024 23:20:26 -0800 Subject: [PATCH 6/8] Note panic risk --- pgrx/src/array.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pgrx/src/array.rs b/pgrx/src/array.rs index 8baca3324..4e7f5e6f1 100644 --- a/pgrx/src/array.rs +++ b/pgrx/src/array.rs @@ -187,6 +187,9 @@ impl RawArray { /// The flattened length of the array over every single element. /// Includes all items, even the ones that might be null. + /// + /// # Panics + /// Panics if the Array's dimensions, multiplied together, exceed sizes Postgres can handle. #[inline] pub fn len(&self) -> usize { // Calculating the product with i32 mirrors the Postgres implementation, From 6b609c67d3151b70fc587e971680917a7da33229 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 27 Feb 2024 13:44:23 -0800 Subject: [PATCH 7/8] Link relevant header --- pgrx/src/array/port.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pgrx/src/array/port.rs b/pgrx/src/array/port.rs index ac2082ee3..25ca90e7b 100644 --- a/pgrx/src/array/port.rs +++ b/pgrx/src/array/port.rs @@ -27,8 +27,12 @@ pub(super) unsafe fn ARR_NDIM(a: *mut pg_sys::ArrayType) -> usize { unsafe { (*a).ndim as usize } } +/// True if [the array *may* have nulls][array.h] +/// /// # Safety /// Does a field access, but doesn't deref out of bounds of ArrayType +/// +/// [array.h]: https://github.com/postgres/postgres/blob/c4bd6ff57c9a7b188cbd93855755f1029d7a5662/src/include/utils/array.h#L9 #[inline(always)] pub(super) unsafe fn ARR_HASNULL(a: *mut pg_sys::ArrayType) -> bool { // #define ARR_HASNULL(a) ((a)->dataoffset != 0) From 8a73801992fce0324e82708c31108e66a812c0e0 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 27 Feb 2024 14:19:59 -0800 Subject: [PATCH 8/8] Check array `len < MAX_ARRAY_SIZE` --- pgrx/src/array.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pgrx/src/array.rs b/pgrx/src/array.rs index 4e7f5e6f1..1b288021c 100644 --- a/pgrx/src/array.rs +++ b/pgrx/src/array.rs @@ -192,16 +192,21 @@ impl RawArray { /// Panics if the Array's dimensions, multiplied together, exceed sizes Postgres can handle. #[inline] pub fn len(&self) -> usize { - // Calculating the product with i32 mirrors the Postgres implementation, + // Calculating the product mostly mirrors the Postgres implementation, // except we can use checked_mul instead of trying to cast to 64 bits and // hoping that doesn't also overflow on multiplication. + // Also integer promotion doesn't real, so bitcast negatives. let dims = self.dims(); if dims.len() == 0 { 0 } else { + // bindgen whiffs MaxArraySize AND MaxAllocSize! + const MAX_ARRAY_SIZE: u32 = 0x3fffffff / 8; dims.into_iter() - .fold(Some(1i32), |prod, &d| prod.and_then(|m| m.checked_mul(d))) - .expect("Product of array dimensions should be <= i32::MAX") as usize + .map(|i| *i as u32) // treat negatives as huge + .fold(Some(1u32), |prod, d| prod.and_then(|m| m.checked_mul(d))) + .filter(|prod| prod <= &MAX_ARRAY_SIZE) + .expect("product of array dimensions must be < 2.pow(27)") as usize } }