From 5a7342c3dde43c96a71bc27995030896342761f6 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Thu, 2 Feb 2023 20:58:22 -0800 Subject: [PATCH 1/4] Stop using `into_iter` in `array::map` --- library/core/src/array/drain.rs | 51 +++++++++++++++++++++++++++++++++ library/core/src/array/mod.rs | 34 +++++++++++++++------- library/core/tests/array.rs | 25 ++++++++++++++++ tests/codegen/array-map.rs | 48 +++++++++++++++++++++++++++++++ 4 files changed, 147 insertions(+), 11 deletions(-) create mode 100644 library/core/src/array/drain.rs create mode 100644 tests/codegen/array-map.rs diff --git a/library/core/src/array/drain.rs b/library/core/src/array/drain.rs new file mode 100644 index 0000000000000..5ca93d54f876f --- /dev/null +++ b/library/core/src/array/drain.rs @@ -0,0 +1,51 @@ +use crate::iter::TrustedLen; +use crate::mem::ManuallyDrop; +use crate::ptr::drop_in_place; +use crate::slice; + +// INVARIANT: It's ok to drop the remainder of the inner iterator. +pub(crate) struct Drain<'a, T>(slice::IterMut<'a, T>); + +pub(crate) fn drain_array_with( + array: [T; N], + func: impl for<'a> FnOnce(Drain<'a, T>) -> R, +) -> R { + let mut array = ManuallyDrop::new(array); + // SAFETY: Now that the local won't drop it, it's ok to construct the `Drain` which will. + let drain = Drain(array.iter_mut()); + func(drain) +} + +impl Drop for Drain<'_, T> { + fn drop(&mut self) { + // SAFETY: By the type invariant, we're allowed to drop all these. + unsafe { drop_in_place(self.0.as_mut_slice()) } + } +} + +impl Iterator for Drain<'_, T> { + type Item = T; + + #[inline] + fn next(&mut self) -> Option { + let p: *const T = self.0.next()?; + // SAFETY: The iterator was already advanced, so we won't drop this later. + Some(unsafe { p.read() }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let n = self.len(); + (n, Some(n)) + } +} + +impl ExactSizeIterator for Drain<'_, T> { + #[inline] + fn len(&self) -> usize { + self.0.len() + } +} + +// SAFETY: This is a 1:1 wrapper for a slice iterator, which is also `TrustedLen`. +unsafe impl TrustedLen for Drain<'_, T> {} diff --git a/library/core/src/array/mod.rs b/library/core/src/array/mod.rs index 2825e0bbb4385..ee340f385431d 100644 --- a/library/core/src/array/mod.rs +++ b/library/core/src/array/mod.rs @@ -17,9 +17,12 @@ use crate::ops::{ }; use crate::slice::{Iter, IterMut}; +mod drain; mod equality; mod iter; +pub(crate) use drain::drain_array_with; + #[stable(feature = "array_value_iter", since = "1.51.0")] pub use iter::IntoIter; @@ -513,9 +516,12 @@ impl [T; N] { where F: FnMut(T) -> U, { - // SAFETY: we know for certain that this iterator will yield exactly `N` - // items. - unsafe { collect_into_array_unchecked(&mut IntoIterator::into_iter(self).map(f)) } + drain_array_with(self, |iter| { + let mut iter = iter.map(f); + // SAFETY: we know for certain that this iterator will yield exactly `N` + // items. + unsafe { collect_into_array_unchecked(&mut iter) } + }) } /// A fallible function `f` applied to each element on array `self` in order to @@ -552,9 +558,12 @@ impl [T; N] { R: Try, R::Residual: Residual<[R::Output; N]>, { - // SAFETY: we know for certain that this iterator will yield exactly `N` - // items. - unsafe { try_collect_into_array_unchecked(&mut IntoIterator::into_iter(self).map(f)) } + drain_array_with(self, |iter| { + let mut iter = iter.map(f); + // SAFETY: we know for certain that this iterator will yield exactly `N` + // items. + unsafe { try_collect_into_array_unchecked(&mut iter) } + }) } /// 'Zips up' two arrays into a single array of pairs. @@ -575,11 +584,14 @@ impl [T; N] { /// ``` #[unstable(feature = "array_zip", issue = "80094")] pub fn zip(self, rhs: [U; N]) -> [(T, U); N] { - let mut iter = IntoIterator::into_iter(self).zip(rhs); - - // SAFETY: we know for certain that this iterator will yield exactly `N` - // items. - unsafe { collect_into_array_unchecked(&mut iter) } + drain_array_with(self, |lhs| { + drain_array_with(rhs, |rhs| { + let mut iter = crate::iter::zip(lhs, rhs); + // SAFETY: we know for certain that this iterator will yield exactly `N` + // items. + unsafe { collect_into_array_unchecked(&mut iter) } + }) + }) } /// Returns a slice containing the entire array. Equivalent to `&s[..]`. diff --git a/library/core/tests/array.rs b/library/core/tests/array.rs index f268fe3ae7ba8..5327e4f813925 100644 --- a/library/core/tests/array.rs +++ b/library/core/tests/array.rs @@ -700,3 +700,28 @@ fn array_into_iter_rfold() { let s = it.rfold(10, |a, b| 10 * a + b); assert_eq!(s, 10432); } + +#[cfg(not(panic = "abort"))] +#[test] +fn array_map_drops_unmapped_elements_on_panic() { + struct DropCounter<'a>(usize, &'a AtomicUsize); + impl Drop for DropCounter<'_> { + fn drop(&mut self) { + self.1.fetch_add(1, Ordering::SeqCst); + } + } + + const MAX: usize = 11; + for panic_after in 0..MAX { + let counter = AtomicUsize::new(0); + let a = array::from_fn::<_, 11, _>(|i| DropCounter(i, &counter)); + let success = std::panic::catch_unwind(|| { + let _ = a.map(|x| { + assert!(x.0 < panic_after); + assert_eq!(counter.load(Ordering::SeqCst), x.0); + }); + }); + assert!(success.is_err()); + assert_eq!(counter.load(Ordering::SeqCst), MAX); + } +} diff --git a/tests/codegen/array-map.rs b/tests/codegen/array-map.rs new file mode 100644 index 0000000000000..37585371a3222 --- /dev/null +++ b/tests/codegen/array-map.rs @@ -0,0 +1,48 @@ +// compile-flags: -C opt-level=3 -C target-cpu=x86-64-v3 -C llvm-args=-x86-asm-syntax=intel --emit=llvm-ir,asm +// no-system-llvm +// only-x86_64 +// ignore-debug (the extra assertions get in the way) + +#![crate_type = "lib"] +#![feature(array_zip)] + +// CHECK-LABEL: @short_integer_map +#[no_mangle] +pub fn short_integer_map(x: [u32; 8]) -> [u32; 8] { + // CHECK: load <8 x i32> + // CHECK: shl <8 x i32> + // CHECK: or <8 x i32> + // CHECK: store <8 x i32> + x.map(|x| 2 * x + 1) +} + +// CHECK-LABEL: @short_integer_zip_map +#[no_mangle] +pub fn short_integer_zip_map(x: [u32; 8], y: [u32; 8]) -> [u32; 8] { + // CHECK: %[[A:.+]] = load <8 x i32> + // CHECK: %[[B:.+]] = load <8 x i32> + // CHECK: sub <8 x i32> %[[A]], %[[B]] + // CHECK: store <8 x i32> + x.zip(y).map(|(x, y)| x - y) +} + +// This test is checking that LLVM can SRoA away a bunch of the overhead, +// like fully moving the iterators to registers. Notably, previous implementations +// of `map` ended up `alloca`ing the whole `array::IntoIterator`, meaning both a +// hard-to-eliminate `memcpy` and that the iteration counts needed to be written +// out to stack every iteration, even for infallible operations on `Copy` types. +// +// This is still imperfect, as there's more copies than would be ideal, +// but hopefully work like #103830 will improve that in future, +// and update this test to be stricter. +// +// CHECK-LABEL: @long_integer_map +#[no_mangle] +pub fn long_integer_map(x: [u32; 64]) -> [u32; 64] { + // CHECK: start: + // CHECK-NEXT: alloca [{{64|65}} x i32] + // CHECK-NEXT: alloca [{{64|65}} x i32] + // CHECK-NEXT: alloca %"core::mem::manually_drop::ManuallyDrop<[u32; 64]>" + // CHECK-NOT: alloca + x.map(|x| 2 * x + 1) +} From 52df0558ea349fa65036e61f0a647ea8072ec3f5 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Thu, 2 Feb 2023 22:15:23 -0800 Subject: [PATCH 2/4] Stop forcing `array::map` through an unnecessary `Result` --- library/core/src/array/mod.rs | 126 +++++++++++++++++++--------------- tests/codegen/array-map.rs | 5 +- 2 files changed, 71 insertions(+), 60 deletions(-) diff --git a/library/core/src/array/mod.rs b/library/core/src/array/mod.rs index ee340f385431d..45ec68e6e7aa3 100644 --- a/library/core/src/array/mod.rs +++ b/library/core/src/array/mod.rs @@ -825,14 +825,13 @@ impl [T; N] { /// Pulls `N` items from `iter` and returns them as an array. If the iterator /// yields fewer than `N` items, this function exhibits undefined behavior. /// -/// See [`try_collect_into_array`] for more information. -/// -/// /// # Safety /// /// It is up to the caller to guarantee that `iter` yields at least `N` items. /// Violating this condition causes undefined behavior. -unsafe fn try_collect_into_array_unchecked(iter: &mut I) -> R::TryType +unsafe fn try_collect_into_array_unchecked( + iter: &mut I, +) -> ChangeOutputType where // Note: `TrustedLen` here is somewhat of an experiment. This is just an // internal function, so feel free to remove if this bound turns out to be a @@ -845,11 +844,21 @@ where debug_assert!(N <= iter.size_hint().1.unwrap_or(usize::MAX)); debug_assert!(N <= iter.size_hint().0); - // SAFETY: covered by the function contract. - unsafe { try_collect_into_array(iter).unwrap_unchecked() } + let mut array = MaybeUninit::uninit_array::(); + let cf = try_collect_into_array_erased(iter, &mut array); + match cf { + ControlFlow::Break(r) => FromResidual::from_residual(r), + ControlFlow::Continue(initialized) => { + debug_assert_eq!(initialized, N); + // SAFETY: because of our function contract, all the elements + // must have been initialized. + let output = unsafe { MaybeUninit::array_assume_init(array) }; + Try::from_output(output) + } + } } -// Infallible version of `try_collect_into_array_unchecked`. +/// Infallible version of [`try_collect_into_array_unchecked`]. unsafe fn collect_into_array_unchecked(iter: &mut I) -> [I::Item; N] where I: Iterator + TrustedLen, @@ -864,63 +873,48 @@ where } } -/// Pulls `N` items from `iter` and returns them as an array. If the iterator -/// yields fewer than `N` items, `Err` is returned containing an iterator over -/// the already yielded items. +/// Rather than *returning* the array, this fills in a passed-in buffer. +/// If any of the iterator elements short-circuit, it drops everything in the +/// buffer and return the error. Otherwise it returns the number of items +/// which were initialized in the buffer. /// -/// Since the iterator is passed as a mutable reference and this function calls -/// `next` at most `N` times, the iterator can still be used afterwards to -/// retrieve the remaining items. +/// (The caller is responsible for dropping those items on success, but not +/// doing that is just a leak, not UB, so this function is itself safe.) /// -/// If `iter.next()` panicks, all items already yielded by the iterator are -/// dropped. +/// This means less monomorphization, but more importantly it means that the +/// returned array doesn't need to be copied into the `Result`, since returning +/// the result seemed (2023-01) to cause in an extra `N + 1`-length `alloca` +/// even if it's always `unwrap_unchecked` later. #[inline] -fn try_collect_into_array( +fn try_collect_into_array_erased( iter: &mut I, -) -> Result> + buffer: &mut [MaybeUninit], +) -> ControlFlow where I: Iterator, I::Item: Try, - R: Residual<[T; N]>, { - if N == 0 { - // SAFETY: An empty array is always inhabited and has no validity invariants. - return Ok(Try::from_output(unsafe { mem::zeroed() })); - } - - let mut array = MaybeUninit::uninit_array::(); - let mut guard = Guard { array_mut: &mut array, initialized: 0 }; + let n = buffer.len(); + let mut guard = Guard { array_mut: buffer, initialized: 0 }; - for _ in 0..N { + for _ in 0..n { match iter.next() { Some(item_rslt) => { - let item = match item_rslt.branch() { - ControlFlow::Break(r) => { - return Ok(FromResidual::from_residual(r)); - } - ControlFlow::Continue(elem) => elem, - }; + let item = item_rslt.branch()?; // SAFETY: `guard.initialized` starts at 0, which means push can be called - // at most N times, which this loop does. + // at most `n` times, which this loop does. unsafe { guard.push_unchecked(item); } } - None => { - let alive = 0..guard.initialized; - mem::forget(guard); - // SAFETY: `array` was initialized with exactly `initialized` - // number of elements. - return Err(unsafe { IntoIter::new_unchecked(array, alive) }); - } + None => break, } } + let initialized = guard.initialized; mem::forget(guard); - // SAFETY: All elements of the array were populated in the loop above. - let output = unsafe { array.transpose().assume_init() }; - Ok(Try::from_output(output)) + ControlFlow::Continue(initialized) } /// Panic guard for incremental initialization of arrays. @@ -934,14 +928,14 @@ where /// /// To minimize indirection fields are still pub but callers should at least use /// `push_unchecked` to signal that something unsafe is going on. -pub(crate) struct Guard<'a, T, const N: usize> { +pub(crate) struct Guard<'a, T> { /// The array to be initialized. - pub array_mut: &'a mut [MaybeUninit; N], + pub array_mut: &'a mut [MaybeUninit], /// The number of items that have been initialized so far. pub initialized: usize, } -impl Guard<'_, T, N> { +impl Guard<'_, T> { /// Adds an item to the array and updates the initialized item counter. /// /// # Safety @@ -959,9 +953,9 @@ impl Guard<'_, T, N> { } } -impl Drop for Guard<'_, T, N> { +impl Drop for Guard<'_, T> { fn drop(&mut self) { - debug_assert!(self.initialized <= N); + debug_assert!(self.initialized <= self.array_mut.len()); // SAFETY: this slice will contain only initialized objects. unsafe { @@ -972,15 +966,33 @@ impl Drop for Guard<'_, T, N> { } } -/// Returns the next chunk of `N` items from the iterator or errors with an -/// iterator over the remainder. Used for `Iterator::next_chunk`. +/// Pulls `N` items from `iter` and returns them as an array. If the iterator +/// yields fewer than `N` items, `Err` is returned containing an iterator over +/// the already yielded items. +/// +/// Since the iterator is passed as a mutable reference and this function calls +/// `next` at most `N` times, the iterator can still be used afterwards to +/// retrieve the remaining items. +/// +/// If `iter.next()` panicks, all items already yielded by the iterator are +/// dropped. +/// +/// Used for [`Iterator::next_chunk`]. #[inline] -pub(crate) fn iter_next_chunk( - iter: &mut I, -) -> Result<[I::Item; N], IntoIter> -where - I: Iterator, -{ +pub(crate) fn iter_next_chunk( + iter: &mut impl Iterator, +) -> Result<[T; N], IntoIter> { let mut map = iter.map(NeverShortCircuit); - try_collect_into_array(&mut map).map(|NeverShortCircuit(arr)| arr) + let mut array = MaybeUninit::uninit_array::(); + let ControlFlow::Continue(initialized) = try_collect_into_array_erased(&mut map, &mut array); + if initialized == N { + // SAFETY: All elements of the array were populated. + let output = unsafe { MaybeUninit::array_assume_init(array) }; + Ok(output) + } else { + let alive = 0..initialized; + // SAFETY: `array` was initialized with exactly `initialized` + // number of elements. + return Err(unsafe { IntoIter::new_unchecked(array, alive) }); + } } diff --git a/tests/codegen/array-map.rs b/tests/codegen/array-map.rs index 37585371a3222..1154659eea541 100644 --- a/tests/codegen/array-map.rs +++ b/tests/codegen/array-map.rs @@ -1,4 +1,4 @@ -// compile-flags: -C opt-level=3 -C target-cpu=x86-64-v3 -C llvm-args=-x86-asm-syntax=intel --emit=llvm-ir,asm +// compile-flags: -C opt-level=3 -C target-cpu=x86-64-v3 // no-system-llvm // only-x86_64 // ignore-debug (the extra assertions get in the way) @@ -40,8 +40,7 @@ pub fn short_integer_zip_map(x: [u32; 8], y: [u32; 8]) -> [u32; 8] { #[no_mangle] pub fn long_integer_map(x: [u32; 64]) -> [u32; 64] { // CHECK: start: - // CHECK-NEXT: alloca [{{64|65}} x i32] - // CHECK-NEXT: alloca [{{64|65}} x i32] + // CHECK-NEXT: alloca [64 x i32] // CHECK-NEXT: alloca %"core::mem::manually_drop::ManuallyDrop<[u32; 64]>" // CHECK-NOT: alloca x.map(|x| 2 * x + 1) From 5bc328fdeff50b742a8136d0633924514d4d76b8 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Fri, 3 Feb 2023 03:27:51 -0800 Subject: [PATCH 3/4] Allow canonicalizing the `array::map` loop in trusted cases --- library/core/src/array/drain.rs | 33 ++- library/core/src/array/mod.rs | 224 ++++++++---------- .../core/src/iter/adapters/array_chunks.rs | 14 +- library/core/src/iter/adapters/cloned.rs | 15 +- library/core/src/iter/adapters/map.rs | 15 +- library/core/src/iter/adapters/zip.rs | 9 +- library/core/src/iter/mod.rs | 1 + library/core/src/iter/traits/mod.rs | 3 + .../src/iter/traits/unchecked_iterator.rs | 36 +++ library/core/src/ops/try_trait.rs | 9 + library/core/src/slice/iter.rs | 4 +- library/core/src/slice/iter/macros.rs | 9 + library/core/tests/iter/traits/iterator.rs | 3 + tests/codegen/array-map.rs | 4 +- 14 files changed, 237 insertions(+), 142 deletions(-) create mode 100644 library/core/src/iter/traits/unchecked_iterator.rs diff --git a/library/core/src/array/drain.rs b/library/core/src/array/drain.rs index 5ca93d54f876f..5fadf907b6219 100644 --- a/library/core/src/array/drain.rs +++ b/library/core/src/array/drain.rs @@ -1,11 +1,21 @@ -use crate::iter::TrustedLen; +use crate::iter::{TrustedLen, UncheckedIterator}; use crate::mem::ManuallyDrop; use crate::ptr::drop_in_place; use crate::slice; -// INVARIANT: It's ok to drop the remainder of the inner iterator. -pub(crate) struct Drain<'a, T>(slice::IterMut<'a, T>); - +/// A situationally-optimized version of `array.into_iter().for_each(func)`. +/// +/// [`crate::array::IntoIter`]s are great when you need an owned iterator, but +/// storing the entire array *inside* the iterator like that can sometimes +/// pessimize code. Notable, it can be more bytes than you really want to move +/// around, and because the array accesses index into it SRoA has a harder time +/// optimizing away the type than it does iterators that just hold a couple pointers. +/// +/// Thus this function exists, which gives a way to get *moved* access to the +/// elements of an array using a small iterator -- no bigger than a slice iterator. +/// +/// The function-taking-a-closure structure makes it safe, as it keeps callers +/// from looking at already-dropped elements. pub(crate) fn drain_array_with( array: [T; N], func: impl for<'a> FnOnce(Drain<'a, T>) -> R, @@ -16,6 +26,11 @@ pub(crate) fn drain_array_with( func(drain) } +/// See [`drain_array_with`] -- this is `pub(crate)` only so it's allowed to be +/// mentioned in the signature of that method. (Otherwise it hits `E0446`.) +// INVARIANT: It's ok to drop the remainder of the inner iterator. +pub(crate) struct Drain<'a, T>(slice::IterMut<'a, T>); + impl Drop for Drain<'_, T> { fn drop(&mut self) { // SAFETY: By the type invariant, we're allowed to drop all these. @@ -49,3 +64,13 @@ impl ExactSizeIterator for Drain<'_, T> { // SAFETY: This is a 1:1 wrapper for a slice iterator, which is also `TrustedLen`. unsafe impl TrustedLen for Drain<'_, T> {} + +impl UncheckedIterator for Drain<'_, T> { + unsafe fn next_unchecked(&mut self) -> T { + // SAFETY: `Drain` is 1:1 with the inner iterator, so if the caller promised + // that there's an element left, the inner iterator has one too. + let p: *const T = unsafe { self.0.next_unchecked() }; + // SAFETY: The iterator was already advanced, so we won't drop this later. + unsafe { p.read() } + } +} diff --git a/library/core/src/array/mod.rs b/library/core/src/array/mod.rs index 45ec68e6e7aa3..ae9f6e70f43c6 100644 --- a/library/core/src/array/mod.rs +++ b/library/core/src/array/mod.rs @@ -10,7 +10,7 @@ use crate::convert::{Infallible, TryFrom}; use crate::error::Error; use crate::fmt; use crate::hash::{self, Hash}; -use crate::iter::TrustedLen; +use crate::iter::UncheckedIterator; use crate::mem::{self, MaybeUninit}; use crate::ops::{ ChangeOutputType, ControlFlow, FromResidual, Index, IndexMut, NeverShortCircuit, Residual, Try, @@ -55,16 +55,11 @@ pub use iter::IntoIter; /// ``` #[inline] #[stable(feature = "array_from_fn", since = "1.63.0")] -pub fn from_fn(mut cb: F) -> [T; N] +pub fn from_fn(cb: F) -> [T; N] where F: FnMut(usize) -> T, { - let mut idx = 0; - [(); N].map(|_| { - let res = cb(idx); - idx += 1; - res - }) + try_from_fn(NeverShortCircuit::wrap_mut_1(cb)).0 } /// Creates an array `[T; N]` where each fallible array element `T` is returned by the `cb` call. @@ -104,9 +99,14 @@ where R: Try, R::Residual: Residual<[R::Output; N]>, { - // SAFETY: we know for certain that this iterator will yield exactly `N` - // items. - unsafe { try_collect_into_array_unchecked(&mut (0..N).map(cb)) } + let mut array = MaybeUninit::uninit_array::(); + match try_from_fn_erased(&mut array, cb) { + ControlFlow::Break(r) => FromResidual::from_residual(r), + ControlFlow::Continue(()) => { + // SAFETY: All elements of the array were populated. + try { unsafe { MaybeUninit::array_assume_init(array) } } + } + } } /// Converts a reference to `T` into a reference to an array of length 1 (without copying). @@ -430,9 +430,7 @@ trait SpecArrayClone: Clone { impl SpecArrayClone for T { #[inline] default fn clone(array: &[T; N]) -> [T; N] { - // SAFETY: we know for certain that this iterator will yield exactly `N` - // items. - unsafe { collect_into_array_unchecked(&mut array.iter().cloned()) } + from_trusted_iterator(array.iter().cloned()) } } @@ -516,12 +514,7 @@ impl [T; N] { where F: FnMut(T) -> U, { - drain_array_with(self, |iter| { - let mut iter = iter.map(f); - // SAFETY: we know for certain that this iterator will yield exactly `N` - // items. - unsafe { collect_into_array_unchecked(&mut iter) } - }) + self.try_map(NeverShortCircuit::wrap_mut_1(f)).0 } /// A fallible function `f` applied to each element on array `self` in order to @@ -558,12 +551,7 @@ impl [T; N] { R: Try, R::Residual: Residual<[R::Output; N]>, { - drain_array_with(self, |iter| { - let mut iter = iter.map(f); - // SAFETY: we know for certain that this iterator will yield exactly `N` - // items. - unsafe { try_collect_into_array_unchecked(&mut iter) } - }) + drain_array_with(self, |iter| try_from_trusted_iterator(iter.map(f))) } /// 'Zips up' two arrays into a single array of pairs. @@ -585,12 +573,7 @@ impl [T; N] { #[unstable(feature = "array_zip", issue = "80094")] pub fn zip(self, rhs: [U; N]) -> [(T, U); N] { drain_array_with(self, |lhs| { - drain_array_with(rhs, |rhs| { - let mut iter = crate::iter::zip(lhs, rhs); - // SAFETY: we know for certain that this iterator will yield exactly `N` - // items. - unsafe { collect_into_array_unchecked(&mut iter) } - }) + drain_array_with(rhs, |rhs| from_trusted_iterator(crate::iter::zip(lhs, rhs))) }) } @@ -638,9 +621,7 @@ impl [T; N] { /// ``` #[unstable(feature = "array_methods", issue = "76118")] pub fn each_ref(&self) -> [&T; N] { - // SAFETY: we know for certain that this iterator will yield exactly `N` - // items. - unsafe { collect_into_array_unchecked(&mut self.iter()) } + from_trusted_iterator(self.iter()) } /// Borrows each element mutably and returns an array of mutable references @@ -660,9 +641,7 @@ impl [T; N] { /// ``` #[unstable(feature = "array_methods", issue = "76118")] pub fn each_mut(&mut self) -> [&mut T; N] { - // SAFETY: we know for certain that this iterator will yield exactly `N` - // items. - unsafe { collect_into_array_unchecked(&mut self.iter_mut()) } + from_trusted_iterator(self.iter_mut()) } /// Divides one array reference into two at an index. @@ -822,99 +801,71 @@ impl [T; N] { } } -/// Pulls `N` items from `iter` and returns them as an array. If the iterator -/// yields fewer than `N` items, this function exhibits undefined behavior. +/// Populate an array from the first `N` elements of `iter` /// -/// # Safety +/// # Panics /// -/// It is up to the caller to guarantee that `iter` yields at least `N` items. -/// Violating this condition causes undefined behavior. -unsafe fn try_collect_into_array_unchecked( - iter: &mut I, -) -> ChangeOutputType -where - // Note: `TrustedLen` here is somewhat of an experiment. This is just an - // internal function, so feel free to remove if this bound turns out to be a - // bad idea. In that case, remember to also remove the lower bound - // `debug_assert!` below! - I: Iterator + TrustedLen, - I::Item: Try, - R: Residual<[T; N]>, -{ - debug_assert!(N <= iter.size_hint().1.unwrap_or(usize::MAX)); - debug_assert!(N <= iter.size_hint().0); - - let mut array = MaybeUninit::uninit_array::(); - let cf = try_collect_into_array_erased(iter, &mut array); - match cf { - ControlFlow::Break(r) => FromResidual::from_residual(r), - ControlFlow::Continue(initialized) => { - debug_assert_eq!(initialized, N); - // SAFETY: because of our function contract, all the elements - // must have been initialized. - let output = unsafe { MaybeUninit::array_assume_init(array) }; - Try::from_output(output) - } - } +/// If the iterator doesn't actually have enough items. +/// +/// By depending on `TrustedLen`, however, we can do that check up-front (where +/// it easily optimizes away) so it doesn't impact the loop that fills the array. +#[inline] +fn from_trusted_iterator(iter: impl UncheckedIterator) -> [T; N] { + try_from_trusted_iterator(iter.map(NeverShortCircuit)).0 } -/// Infallible version of [`try_collect_into_array_unchecked`]. -unsafe fn collect_into_array_unchecked(iter: &mut I) -> [I::Item; N] +#[inline] +fn try_from_trusted_iterator( + iter: impl UncheckedIterator, +) -> ChangeOutputType where - I: Iterator + TrustedLen, + R: Try, + R::Residual: Residual<[T; N]>, { - let mut map = iter.map(NeverShortCircuit); - - // SAFETY: The same safety considerations w.r.t. the iterator length - // apply for `try_collect_into_array_unchecked` as for - // `collect_into_array_unchecked` - match unsafe { try_collect_into_array_unchecked(&mut map) } { - NeverShortCircuit(array) => array, + assert!(iter.size_hint().0 >= N); + fn next(mut iter: impl UncheckedIterator) -> impl FnMut(usize) -> T { + move |_| { + // SAFETY: We know that `from_fn` will call this at most N times, + // and we checked to ensure that we have at least that many items. + unsafe { iter.next_unchecked() } + } } + + try_from_fn(next(iter)) } -/// Rather than *returning* the array, this fills in a passed-in buffer. -/// If any of the iterator elements short-circuit, it drops everything in the -/// buffer and return the error. Otherwise it returns the number of items -/// which were initialized in the buffer. +/// Version of [`try_from_fn`] using a passed-in slice in order to avoid +/// needing to monomorphize for every array length. /// -/// (The caller is responsible for dropping those items on success, but not -/// doing that is just a leak, not UB, so this function is itself safe.) +/// This takes a generator rather than an iterator so that *at the type level* +/// it never needs to worry about running out of items. When combined with +/// an infallible `Try` type, that means the loop canonicalizes easily, allowing +/// it to optimize well. /// -/// This means less monomorphization, but more importantly it means that the -/// returned array doesn't need to be copied into the `Result`, since returning -/// the result seemed (2023-01) to cause in an extra `N + 1`-length `alloca` -/// even if it's always `unwrap_unchecked` later. +/// It would be *possible* to unify this and [`iter_next_chunk_erased`] into one +/// function that does the union of both things, but last time it was that way +/// it resulted in poor codegen from the "are there enough source items?" checks +/// not optimizing away. So if you give it a shot, make sure to watch what +/// happens in the codegen tests. #[inline] -fn try_collect_into_array_erased( - iter: &mut I, +fn try_from_fn_erased( buffer: &mut [MaybeUninit], -) -> ControlFlow + mut generator: impl FnMut(usize) -> R, +) -> ControlFlow where - I: Iterator, - I::Item: Try, + R: Try, { - let n = buffer.len(); let mut guard = Guard { array_mut: buffer, initialized: 0 }; - for _ in 0..n { - match iter.next() { - Some(item_rslt) => { - let item = item_rslt.branch()?; + while guard.initialized < guard.array_mut.len() { + let item = generator(guard.initialized).branch()?; - // SAFETY: `guard.initialized` starts at 0, which means push can be called - // at most `n` times, which this loop does. - unsafe { - guard.push_unchecked(item); - } - } - None => break, - } + // SAFETY: The loop condition ensures we have space to push the item + unsafe { guard.push_unchecked(item) }; } - let initialized = guard.initialized; mem::forget(guard); - ControlFlow::Continue(initialized) + ControlFlow::Continue(()) } /// Panic guard for incremental initialization of arrays. @@ -928,7 +879,7 @@ where /// /// To minimize indirection fields are still pub but callers should at least use /// `push_unchecked` to signal that something unsafe is going on. -pub(crate) struct Guard<'a, T> { +struct Guard<'a, T> { /// The array to be initialized. pub array_mut: &'a mut [MaybeUninit], /// The number of items that have been initialized so far. @@ -960,7 +911,7 @@ impl Drop for Guard<'_, T> { // SAFETY: this slice will contain only initialized objects. unsafe { crate::ptr::drop_in_place(MaybeUninit::slice_assume_init_mut( - &mut self.array_mut.get_unchecked_mut(..self.initialized), + self.array_mut.get_unchecked_mut(..self.initialized), )); } } @@ -982,17 +933,44 @@ impl Drop for Guard<'_, T> { pub(crate) fn iter_next_chunk( iter: &mut impl Iterator, ) -> Result<[T; N], IntoIter> { - let mut map = iter.map(NeverShortCircuit); let mut array = MaybeUninit::uninit_array::(); - let ControlFlow::Continue(initialized) = try_collect_into_array_erased(&mut map, &mut array); - if initialized == N { - // SAFETY: All elements of the array were populated. - let output = unsafe { MaybeUninit::array_assume_init(array) }; - Ok(output) - } else { - let alive = 0..initialized; - // SAFETY: `array` was initialized with exactly `initialized` - // number of elements. - return Err(unsafe { IntoIter::new_unchecked(array, alive) }); + let r = iter_next_chunk_erased(&mut array, iter); + match r { + Ok(()) => { + // SAFETY: All elements of `array` were populated. + Ok(unsafe { MaybeUninit::array_assume_init(array) }) + } + Err(initialized) => { + // SAFETY: Only the first `initialized` elements were populated + Err(unsafe { IntoIter::new_unchecked(array, 0..initialized) }) + } + } +} + +/// Version of [`iter_next_chunk`] using a passed-in slice in order to avoid +/// needing to monomorphize for every array length. +/// +/// Unfortunately this loop has two exit conditions, the buffer filling up +/// or the iterator running out of items, making it tend to optimize poorly. +#[inline] +fn iter_next_chunk_erased( + buffer: &mut [MaybeUninit], + iter: &mut impl Iterator, +) -> Result<(), usize> { + let mut guard = Guard { array_mut: buffer, initialized: 0 }; + while guard.initialized < guard.array_mut.len() { + let Some(item) = iter.next() else { + // Unlike `try_from_fn_erased`, we want to keep the partial results, + // so we need to defuse the guard instead of using `?`. + let initialized = guard.initialized; + mem::forget(guard); + return Err(initialized) + }; + + // SAFETY: The loop condition ensures we have space to push the item + unsafe { guard.push_unchecked(item) }; } + + mem::forget(guard); + Ok(()) } diff --git a/library/core/src/iter/adapters/array_chunks.rs b/library/core/src/iter/adapters/array_chunks.rs index af786609757b1..13719c727e93f 100644 --- a/library/core/src/iter/adapters/array_chunks.rs +++ b/library/core/src/iter/adapters/array_chunks.rs @@ -1,6 +1,5 @@ use crate::array; use crate::iter::{ByRefSized, FusedIterator, Iterator, TrustedRandomAccessNoCoerce}; -use crate::mem::{self, MaybeUninit}; use crate::ops::{ControlFlow, NeverShortCircuit, Try}; /// An iterator over `N` elements of the iterator at a time. @@ -212,19 +211,14 @@ where let mut i = 0; // Use a while loop because (0..len).step_by(N) doesn't optimize well. while inner_len - i >= N { - let mut chunk = MaybeUninit::uninit_array(); - let mut guard = array::Guard { array_mut: &mut chunk, initialized: 0 }; - while guard.initialized < N { + let chunk = crate::array::from_fn(|local| { // SAFETY: The method consumes the iterator and the loop condition ensures that // all accesses are in bounds and only happen once. unsafe { - let idx = i + guard.initialized; - guard.push_unchecked(self.iter.__iterator_get_unchecked(idx)); + let idx = i + local; + self.iter.__iterator_get_unchecked(idx) } - } - mem::forget(guard); - // SAFETY: The loop above initialized all elements - let chunk = unsafe { MaybeUninit::array_assume_init(chunk) }; + }); accum = f(accum, chunk); i += N; } diff --git a/library/core/src/iter/adapters/cloned.rs b/library/core/src/iter/adapters/cloned.rs index aba24a79dcf79..914ff86c1a959 100644 --- a/library/core/src/iter/adapters/cloned.rs +++ b/library/core/src/iter/adapters/cloned.rs @@ -1,7 +1,7 @@ use crate::iter::adapters::{ zip::try_get_unchecked, TrustedRandomAccess, TrustedRandomAccessNoCoerce, }; -use crate::iter::{FusedIterator, TrustedLen}; +use crate::iter::{FusedIterator, TrustedLen, UncheckedIterator}; use crate::ops::Try; /// An iterator that clones the elements of an underlying iterator. @@ -140,3 +140,16 @@ where T: Clone, { } + +impl<'a, I, T: 'a> UncheckedIterator for Cloned +where + I: UncheckedIterator, + T: Clone, +{ + unsafe fn next_unchecked(&mut self) -> T { + // SAFETY: `Cloned` is 1:1 with the inner iterator, so if the caller promised + // that there's an element left, the inner iterator has one too. + let item = unsafe { self.it.next_unchecked() }; + item.clone() + } +} diff --git a/library/core/src/iter/adapters/map.rs b/library/core/src/iter/adapters/map.rs index 9e25dbe462c91..31d02a4da6ea5 100644 --- a/library/core/src/iter/adapters/map.rs +++ b/library/core/src/iter/adapters/map.rs @@ -2,7 +2,7 @@ use crate::fmt; use crate::iter::adapters::{ zip::try_get_unchecked, SourceIter, TrustedRandomAccess, TrustedRandomAccessNoCoerce, }; -use crate::iter::{FusedIterator, InPlaceIterable, TrustedLen}; +use crate::iter::{FusedIterator, InPlaceIterable, TrustedLen, UncheckedIterator}; use crate::ops::Try; /// An iterator that maps the values of `iter` with `f`. @@ -187,6 +187,19 @@ where { } +impl UncheckedIterator for Map +where + I: UncheckedIterator, + F: FnMut(I::Item) -> B, +{ + unsafe fn next_unchecked(&mut self) -> B { + // SAFETY: `Map` is 1:1 with the inner iterator, so if the caller promised + // that there's an element left, the inner iterator has one too. + let item = unsafe { self.iter.next_unchecked() }; + (self.f)(item) + } +} + #[doc(hidden)] #[unstable(feature = "trusted_random_access", issue = "none")] unsafe impl TrustedRandomAccess for Map where I: TrustedRandomAccess {} diff --git a/library/core/src/iter/adapters/zip.rs b/library/core/src/iter/adapters/zip.rs index 8153c8cfef133..b6b0c90cb7d14 100644 --- a/library/core/src/iter/adapters/zip.rs +++ b/library/core/src/iter/adapters/zip.rs @@ -1,7 +1,7 @@ use crate::cmp; use crate::fmt::{self, Debug}; use crate::iter::{DoubleEndedIterator, ExactSizeIterator, FusedIterator, Iterator}; -use crate::iter::{InPlaceIterable, SourceIter, TrustedLen}; +use crate::iter::{InPlaceIterable, SourceIter, TrustedLen, UncheckedIterator}; /// An iterator that iterates two other iterators simultaneously. /// @@ -417,6 +417,13 @@ where { } +impl UncheckedIterator for Zip +where + A: UncheckedIterator, + B: UncheckedIterator, +{ +} + // Arbitrarily selects the left side of the zip iteration as extractable "source" // it would require negative trait bounds to be able to try both #[unstable(issue = "none", feature = "inplace_iteration")] diff --git a/library/core/src/iter/mod.rs b/library/core/src/iter/mod.rs index 00f57fbcc6162..156b925de773f 100644 --- a/library/core/src/iter/mod.rs +++ b/library/core/src/iter/mod.rs @@ -450,6 +450,7 @@ pub use self::adapters::{ pub use self::adapters::{Intersperse, IntersperseWith}; pub(crate) use self::adapters::try_process; +pub(crate) use self::traits::UncheckedIterator; mod adapters; mod range; diff --git a/library/core/src/iter/traits/mod.rs b/library/core/src/iter/traits/mod.rs index ed0fb634dbf05..41ea29e6a84d9 100644 --- a/library/core/src/iter/traits/mod.rs +++ b/library/core/src/iter/traits/mod.rs @@ -4,6 +4,7 @@ mod double_ended; mod exact_size; mod iterator; mod marker; +mod unchecked_iterator; #[stable(feature = "rust1", since = "1.0.0")] pub use self::{ @@ -19,3 +20,5 @@ pub use self::{ pub use self::marker::InPlaceIterable; #[unstable(feature = "trusted_step", issue = "85731")] pub use self::marker::TrustedStep; + +pub(crate) use self::unchecked_iterator::UncheckedIterator; diff --git a/library/core/src/iter/traits/unchecked_iterator.rs b/library/core/src/iter/traits/unchecked_iterator.rs new file mode 100644 index 0000000000000..ae4bfcad4e68f --- /dev/null +++ b/library/core/src/iter/traits/unchecked_iterator.rs @@ -0,0 +1,36 @@ +use crate::iter::TrustedLen; + +/// [`TrustedLen`] cannot have methods, so this allows augmenting it. +/// +/// It currently requires `TrustedLen` because it's unclear whether it's +/// reasonably possible to depend on the `size_hint` of anything else. +pub(crate) trait UncheckedIterator: TrustedLen { + /// Gets the next item from a non-empty iterator. + /// + /// Because there's always a value to return, that means it can return + /// the `Item` type directly, without wrapping it in an `Option`. + /// + /// # Safety + /// + /// This can only be called if `size_hint().0 != 0`, guaranteeing that + /// there's at least one item available. + /// + /// Otherwise (aka when `size_hint().1 == Some(0)`), this is UB. + /// + /// # Note to Implementers + /// + /// This has a default implementation using [`Option::unwrap_unchecked`]. + /// That's probably sufficient if your `next` *always* returns `Some`, + /// such as for infinite iterators. In more complicated situations, however, + /// sometimes there can still be `insertvalue`/`assume`/`extractvalue` + /// instructions remaining in the IR from the `Option` handling, at which + /// point you might want to implement this manually instead. + #[unstable(feature = "trusted_len_next_unchecked", issue = "37572")] + #[inline] + unsafe fn next_unchecked(&mut self) -> Self::Item { + let opt = self.next(); + // SAFETY: The caller promised that we're not empty, and + // `Self: TrustedLen` so we can actually trust the `size_hint`. + unsafe { opt.unwrap_unchecked() } + } +} diff --git a/library/core/src/ops/try_trait.rs b/library/core/src/ops/try_trait.rs index 9108fc6304525..86aa1e4fd20ba 100644 --- a/library/core/src/ops/try_trait.rs +++ b/library/core/src/ops/try_trait.rs @@ -379,6 +379,15 @@ pub(crate) type ChangeOutputType = <::Residual as Residual>:: pub(crate) struct NeverShortCircuit(pub T); impl NeverShortCircuit { + /// Wraps a unary function to produce one that wraps the output into a `NeverShortCircuit`. + /// + /// This is useful for implementing infallible functions in terms of the `try_` ones, + /// without accidentally capturing extra generic parameters in a closure. + #[inline] + pub fn wrap_mut_1(mut f: impl FnMut(A) -> T) -> impl FnMut(A) -> NeverShortCircuit { + move |a| NeverShortCircuit(f(a)) + } + #[inline] pub fn wrap_mut_2( mut f: impl ~const FnMut(A, B) -> T, diff --git a/library/core/src/slice/iter.rs b/library/core/src/slice/iter.rs index 90ab43d1289f0..c4317799bcc68 100644 --- a/library/core/src/slice/iter.rs +++ b/library/core/src/slice/iter.rs @@ -7,7 +7,9 @@ use crate::cmp; use crate::cmp::Ordering; use crate::fmt; use crate::intrinsics::assume; -use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce}; +use crate::iter::{ + FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce, UncheckedIterator, +}; use crate::marker::{PhantomData, Send, Sized, Sync}; use crate::mem::{self, SizedTypeProperties}; use crate::num::NonZeroUsize; diff --git a/library/core/src/slice/iter/macros.rs b/library/core/src/slice/iter/macros.rs index 0fd57b197aa97..89b92a7d5975f 100644 --- a/library/core/src/slice/iter/macros.rs +++ b/library/core/src/slice/iter/macros.rs @@ -384,6 +384,15 @@ macro_rules! iterator { #[unstable(feature = "trusted_len", issue = "37572")] unsafe impl TrustedLen for $name<'_, T> {} + + impl<'a, T> UncheckedIterator for $name<'a, T> { + unsafe fn next_unchecked(&mut self) -> $elem { + // SAFETY: The caller promised there's at least one more item. + unsafe { + next_unchecked!(self) + } + } + } } } diff --git a/library/core/tests/iter/traits/iterator.rs b/library/core/tests/iter/traits/iterator.rs index 37345c1d38142..62566a9502d04 100644 --- a/library/core/tests/iter/traits/iterator.rs +++ b/library/core/tests/iter/traits/iterator.rs @@ -582,6 +582,9 @@ fn test_next_chunk() { assert_eq!(it.next_chunk().unwrap(), []); assert_eq!(it.next_chunk().unwrap(), [4, 5, 6, 7, 8, 9]); assert_eq!(it.next_chunk::<4>().unwrap_err().as_slice(), &[10, 11]); + + let mut it = std::iter::repeat_with(|| panic!()); + assert_eq!(it.next_chunk::<0>().unwrap(), []); } // just tests by whether or not this compiles diff --git a/tests/codegen/array-map.rs b/tests/codegen/array-map.rs index 1154659eea541..9298e89e397d0 100644 --- a/tests/codegen/array-map.rs +++ b/tests/codegen/array-map.rs @@ -43,5 +43,7 @@ pub fn long_integer_map(x: [u32; 64]) -> [u32; 64] { // CHECK-NEXT: alloca [64 x i32] // CHECK-NEXT: alloca %"core::mem::manually_drop::ManuallyDrop<[u32; 64]>" // CHECK-NOT: alloca - x.map(|x| 2 * x + 1) + // CHECK: mul <{{[0-9]+}} x i32> + // CHECK: add <{{[0-9]+}} x i32> + x.map(|x| 13 * x + 7) } From bb77860d9ccdc6a920edeedce313446545294c04 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Sat, 4 Feb 2023 16:33:37 -0800 Subject: [PATCH 4/4] Add another autovectorization codegen test using array zip-map --- tests/codegen/autovectorize-f32x4.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/codegen/autovectorize-f32x4.rs b/tests/codegen/autovectorize-f32x4.rs index 6b09c8fc99860..9ecea53f1c05c 100644 --- a/tests/codegen/autovectorize-f32x4.rs +++ b/tests/codegen/autovectorize-f32x4.rs @@ -1,6 +1,7 @@ -// compile-flags: -C opt-level=3 +// compile-flags: -C opt-level=3 -Z merge-functions=disabled // only-x86_64 #![crate_type = "lib"] +#![feature(array_zip)] // CHECK-LABEL: @auto_vectorize_direct #[no_mangle] @@ -30,3 +31,13 @@ pub fn auto_vectorize_loop(a: [f32; 4], b: [f32; 4]) -> [f32; 4] { } c } + +// CHECK-LABEL: @auto_vectorize_array_zip_map +#[no_mangle] +pub fn auto_vectorize_array_zip_map(a: [f32; 4], b: [f32; 4]) -> [f32; 4] { +// CHECK: load <4 x float> +// CHECK: load <4 x float> +// CHECK: fadd <4 x float> +// CHECK: store <4 x float> + a.zip(b).map(|(a, b)| a + b) +}