From 38fd630eb912ef046bf1e761ef75cf6d5a01d5f8 Mon Sep 17 00:00:00 2001
From: overlookmotel <theoverlookmotel@gmail.com>
Date: Tue, 24 Sep 2024 13:25:27 +0100
Subject: [PATCH] perf(transformer): introduce `NonEmptyStack`

---
 Cargo.lock                                    |   1 +
 crates/oxc_transformer/Cargo.toml             |   1 +
 .../src/helpers/stack/capacity.rs             | 143 ++++
 .../oxc_transformer/src/helpers/stack/mod.rs  |   4 +
 .../src/helpers/stack/non_empty.rs            | 616 ++++++++++++++++++
 .../src/helpers/stack/sparse.rs               |  46 +-
 6 files changed, 774 insertions(+), 37 deletions(-)
 create mode 100644 crates/oxc_transformer/src/helpers/stack/capacity.rs
 create mode 100644 crates/oxc_transformer/src/helpers/stack/non_empty.rs
diff --git a/Cargo.lock b/Cargo.lock
index 54a9b1394f5358..7c876b6ab3b0b3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1971,6 +1971,7 @@ dependencies = [
 name = "oxc_transformer"
 version = "0.30.1"
 dependencies = [
+ "assert-unchecked",
  "base64",
  "dashmap 6.0.1",
  "indexmap",
diff --git a/crates/oxc_transformer/Cargo.toml b/crates/oxc_transformer/Cargo.toml
index 54c8f0c689c1c6..b88c6fbfb4aebd 100644
--- a/crates/oxc_transformer/Cargo.toml
+++ b/crates/oxc_transformer/Cargo.toml
@@ -30,6 +30,7 @@ oxc_span = { workspace = true }
 oxc_syntax = { workspace = true, features = ["to_js_string"] }
 oxc_traverse = { workspace = true }
 
+assert-unchecked = { workspace = true }
 base64 = { workspace = true }
 dashmap = { workspace = true }
 indexmap = { workspace = true }
diff --git a/crates/oxc_transformer/src/helpers/stack/capacity.rs b/crates/oxc_transformer/src/helpers/stack/capacity.rs
new file mode 100644
index 00000000000000..50445d0ed8e122
--- /dev/null
+++ b/crates/oxc_transformer/src/helpers/stack/capacity.rs
@@ -0,0 +1,143 @@
+use std::mem::{align_of, size_of};
+
+/// Trait for defining maximum and default capacity of stacks.
+///
+/// `MAX_CAPACITY` and `MAX_CAPACITY_BYTES` being calculated correctly is required for soundness
+/// of stack types.
+pub trait StackCapacity {
+    /// Type that the stack contains
+    type Item: Sized;
+
+    /// Maximum capacity of stack.
+    ///
+    /// This is guaranteed to be a legal size for a stack of `Item`s, without exceeding Rust's
+    /// allocation size limits.
+    ///
+    /// From [`std::alloc::Layout`]'s docs:
+    /// > size, when rounded up to the nearest multiple of align, must not overflow `isize`
+    /// > (i.e., the rounded value must be less than or equal to `isize::MAX`).
+    const MAX_CAPACITY: usize = {
+        // This assertion is not needed as next line will cause a compile failure anyway
+        // if `size_of::<Self::Item>() == 0`, due to division by zero.
+        // But keep it anyway as soundness depends on it.
+        assert!(size_of::<Self::Item>() > 0, "Zero sized types are not supported");
+        // As it's always true that `size_of::<T>() >= align_of::<T>()` and `/` rounds down,
+        // this fulfills `Layout`'s alignment requirement
+        let max_capacity = isize::MAX as usize / size_of::<Self::Item>();
+        assert!(max_capacity > 0);
+        max_capacity
+    };
+
+    /// Maximum capacity of stack in bytes
+    const MAX_CAPACITY_BYTES: usize = {
+        let capacity_bytes = Self::MAX_CAPACITY * size_of::<Self::Item>();
+        // Just double-checking `Layout`'s alignment requirement is fulfilled
+        assert!(capacity_bytes <= isize::MAX as usize + 1 - align_of::<Self::Item>());
+        capacity_bytes
+    };
+
+    /// Default capacity of stack.
+    ///
+    /// Same defaults as [`std::vec::Vec`] uses.
+    const DEFAULT_CAPACITY: usize = {
+        // It's impossible for this to exceed `MAX_CAPACITY` because `size_of::<T>() >= align_of::<T>()`
+        match size_of::<Self::Item>() {
+            1 => 8,
+            size if size <= 1024 => 4,
+            _ => 1,
+        }
+    };
+
+    /// Default capacity of stack in bytes
+    const DEFAULT_CAPACITY_BYTES: usize = Self::DEFAULT_CAPACITY * size_of::<Self::Item>();
+}
+
+#[cfg(test)]
+#[allow(clippy::assertions_on_constants, clippy::int_plus_one)]
+mod tests {
+    use super::*;
+
+    const ISIZE_MAX: usize = isize::MAX as usize;
+    const ISIZE_MAX_PLUS_ONE: usize = ISIZE_MAX + 1;
+
+    #[test]
+    fn bool() {
+        struct TestStack;
+        impl StackCapacity for TestStack {
+            type Item = bool;
+        }
+        assert_eq!(TestStack::MAX_CAPACITY, ISIZE_MAX);
+        assert_eq!(TestStack::MAX_CAPACITY_BYTES, ISIZE_MAX);
+        assert_eq!(TestStack::DEFAULT_CAPACITY, 8);
+        assert_eq!(TestStack::DEFAULT_CAPACITY_BYTES, 8);
+    }
+
+    #[test]
+    fn u64() {
+        struct TestStack;
+        impl StackCapacity for TestStack {
+            type Item = u64;
+        }
+        assert_eq!(TestStack::MAX_CAPACITY, ISIZE_MAX / 8);
+        assert_eq!(TestStack::MAX_CAPACITY_BYTES, TestStack::MAX_CAPACITY * 8);
+        assert!(TestStack::MAX_CAPACITY_BYTES <= ISIZE_MAX_PLUS_ONE - 8);
+        assert_eq!(TestStack::DEFAULT_CAPACITY, 4);
+        assert_eq!(TestStack::DEFAULT_CAPACITY_BYTES, 32);
+    }
+
+    #[test]
+    fn u32_pair() {
+        struct TestStack;
+        impl StackCapacity for TestStack {
+            type Item = [u32; 2];
+        }
+        assert_eq!(TestStack::MAX_CAPACITY, ISIZE_MAX / 8);
+        assert_eq!(TestStack::MAX_CAPACITY_BYTES, TestStack::MAX_CAPACITY * 8);
+        assert!(TestStack::MAX_CAPACITY_BYTES <= ISIZE_MAX_PLUS_ONE - 4);
+        assert_eq!(TestStack::DEFAULT_CAPACITY, 4);
+        assert_eq!(TestStack::DEFAULT_CAPACITY_BYTES, 32);
+    }
+
+    #[test]
+    fn u32_triple() {
+        struct TestStack;
+        impl StackCapacity for TestStack {
+            type Item = [u32; 3];
+        }
+        assert_eq!(TestStack::MAX_CAPACITY, ISIZE_MAX / 12);
+        assert_eq!(TestStack::MAX_CAPACITY_BYTES, TestStack::MAX_CAPACITY * 12);
+        assert!(TestStack::MAX_CAPACITY_BYTES <= ISIZE_MAX_PLUS_ONE - 4);
+        assert_eq!(TestStack::DEFAULT_CAPACITY, 4);
+        assert_eq!(TestStack::DEFAULT_CAPACITY_BYTES, 48);
+    }
+
+    #[test]
+    fn large_low_alignment() {
+        struct TestStack;
+        impl StackCapacity for TestStack {
+            type Item = [u16; 1000];
+        }
+        assert_eq!(TestStack::MAX_CAPACITY, ISIZE_MAX / 2000);
+        assert_eq!(TestStack::MAX_CAPACITY_BYTES, TestStack::MAX_CAPACITY * 2000);
+        assert!(TestStack::MAX_CAPACITY_BYTES <= ISIZE_MAX_PLUS_ONE - 2);
+        assert_eq!(TestStack::DEFAULT_CAPACITY, 1);
+        assert_eq!(TestStack::DEFAULT_CAPACITY_BYTES, 2000);
+    }
+
+    #[test]
+    fn large_high_alignment() {
+        #[repr(align(4096))]
+        #[allow(dead_code)]
+        struct TestItem(u8);
+
+        struct TestStack;
+        impl StackCapacity for TestStack {
+            type Item = TestItem;
+        }
+        assert_eq!(TestStack::MAX_CAPACITY, ISIZE_MAX / 4096);
+        assert_eq!(TestStack::MAX_CAPACITY_BYTES, TestStack::MAX_CAPACITY * 4096);
+        assert!(TestStack::MAX_CAPACITY_BYTES <= ISIZE_MAX_PLUS_ONE - 4096);
+        assert_eq!(TestStack::DEFAULT_CAPACITY, 1);
+        assert_eq!(TestStack::DEFAULT_CAPACITY_BYTES, 4096);
+    }
+}
diff --git a/crates/oxc_transformer/src/helpers/stack/mod.rs b/crates/oxc_transformer/src/helpers/stack/mod.rs
index 04c462d88a9c1a..b81e94b93edbe0 100644
--- a/crates/oxc_transformer/src/helpers/stack/mod.rs
+++ b/crates/oxc_transformer/src/helpers/stack/mod.rs
@@ -1,3 +1,7 @@
+mod capacity;
+mod non_empty;
 mod sparse;
 
+use capacity::StackCapacity;
+pub use non_empty::NonEmptyStack;
 pub use sparse::SparseStack;
diff --git a/crates/oxc_transformer/src/helpers/stack/non_empty.rs b/crates/oxc_transformer/src/helpers/stack/non_empty.rs
new file mode 100644
index 00000000000000..2c338f2ec6d9c3
--- /dev/null
+++ b/crates/oxc_transformer/src/helpers/stack/non_empty.rs
@@ -0,0 +1,616 @@
+#![expect(clippy::unnecessary_safety_comment)]
+
+use std::{
+    alloc::{self, Layout},
+    mem::{align_of, size_of},
+    ptr::{self, NonNull},
+};
+
+use assert_unchecked::assert_unchecked;
+
+use super::StackCapacity;
+
+/// A stack which can never be empty.
+///
+/// `NonEmptyStack` is created initially with 1 entry, and `pop` does not allow removing it
+/// (though that initial entry can be mutated with `last_mut`).
+///
+/// The fact that the stack is never empty makes all operations except `pop` infallible.
+/// `last` and `last_mut` are branchless.
+///
+/// The trade-off is that you cannot create a `NonEmptyStack` without allocating (unlike `Vec`).
+///
+/// To simplify implementation, zero size types are not supported (e.g. `NonEmptyStack<()>`).
+///
+/// ## Design
+/// Designed for maximally efficient `push`, `pop`, and reading/writing the last value on stack.
+///
+/// The alternative would likely be to use a `Vec`. But `Vec` is optimized for indexing into at
+/// arbitrary positions, not for `push` and `pop`. `Vec` stores `len` and `capacity` as integers,
+/// so requires pointer maths on every operation: `let entry_ptr = base_ptr + index * size_of::<T>();`.
+///
+/// In comparison, `NonEmptyStack` contains a `cursor` pointer, which always points to last entry
+/// on stack, so it can be read/written with a minimum of operations.
+///
+/// This design is similar to `std`'s slice iterator.
+///
+/// Comparison to `Vec`:
+/// * `last` and `last_mut` are 1 instruction, instead of `Vec`'s 4.
+/// * `pop` is 1 instruction shorter than `Vec`'s equivalent.
+/// * `push` is 1 instruction shorter than `Vec`'s equivalent, and uses 1 less register.
+///
+/// ### Possible alternative designs
+/// 1. `cursor` could point to *after* last entry, rather than *to* it. This has advantage that `pop`
+///    uses 1 less register, but disadvantage that `last` and `last_mut` are 2 instructions, not 1.
+///    https://godbolt.org/z/xnx7YP5de
+///
+/// 2. Stack could grow downwards, like `bumpalo` allocator does. This would probably make `pop` use
+///    1 less register, but at the cost that the stack can never grow in place, which would incur more
+///    memory copies when the stack grows.
+pub struct NonEmptyStack<T> {
+    /// Pointer to last entry on stack.
+    /// Points *to* last entry, not *after* last entry.
+    cursor: NonNull<T>,
+    /// Pointer to start of allocation (first entry)
+    start: NonNull<T>,
+    /// Pointer to end of allocation
+    end: NonNull<T>,
+}
+
+impl<T> StackCapacity for NonEmptyStack<T> {
+    type Item = T;
+}
+
+impl<T> NonEmptyStack<T> {
+    /// Maximum capacity.
+    ///
+    /// Effectively unlimited on 64-bit systems.
+    #[allow(dead_code)]
+    pub const MAX_CAPACITY: usize = <Self as StackCapacity>::MAX_CAPACITY;
+
+    /// Create new [`NonEmptyStack`] with default pre-allocated capacity, and initial value `initial_value`.
+    ///
+    /// # Panics
+    /// Panics if `T` is a zero-sized type.
+    #[inline]
+    pub fn new(initial_value: T) -> Self {
+        // SAFETY: `DEFAULT_CAPACITY_BYTES` satisfies requirements
+        unsafe {
+            Self::new_with_capacity_bytes_unchecked(Self::DEFAULT_CAPACITY_BYTES, initial_value)
+        }
+    }
+
+    /// Create new [`NonEmptyStack`] with pre-allocated capacity for `capacity` entries,
+    /// and initial value `initial_value`.
+    ///
+    /// `capacity` cannot be 0.
+    ///
+    /// # Panics
+    /// Panics if any of these requirements are not satisfied:
+    /// * `T` must not be a zero-sized type.
+    /// * `capacity` must not be 0.
+    /// * `capacity` must not exceed [`Self::MAX_CAPACITY`].
+    #[inline]
+    #[cfg_attr(not(test), expect(dead_code))]
+    pub fn with_capacity(capacity: usize, initial_value: T) -> Self {
+        assert!(capacity > 0, "`capacity` cannot be zero");
+        assert!(capacity <= Self::MAX_CAPACITY, "`capacity` must not exceed `Self::MAX_CAPACITY`");
+        // SAFETY: Assertions above ensure `capacity` satisfies requirements
+        unsafe { Self::with_capacity_unchecked(capacity, initial_value) }
+    }
+
+    /// Create new [`NonEmptyStack`] with pre-allocated capacity for `capacity` entries,
+    /// and initial value `initial_value`, without checks.
+    ///
+    /// `capacity` cannot be 0.
+    ///
+    /// # Panics
+    /// Panics if `T` is a zero-sized type.
+    ///
+    /// # SAFETY
+    /// * `capacity` must not be 0.
+    /// * `capacity` must not exceed [`Self::MAX_CAPACITY`].
+    #[inline]
+    pub unsafe fn with_capacity_unchecked(capacity: usize, initial_value: T) -> Self {
+        debug_assert!(capacity > 0);
+        debug_assert!(capacity <= Self::MAX_CAPACITY);
+        // Cannot overflow if `capacity <= MAX_CAPACITY`
+        let capacity_bytes = capacity * size_of::<T>();
+        // SAFETY: Safety invariants which caller must satisify guarantee that `capacity_bytes`
+        // satisfies requirements
+        Self::new_with_capacity_bytes_unchecked(capacity_bytes, initial_value)
+    }
+
+    /// Create new [`NonEmptyStack`] with provided capacity in bytes, and initial value `initial_value`,
+    /// without checks.
+    ///
+    /// # Panics
+    /// Panics if `T` is a zero-sized type.
+    ///
+    /// # SAFETY
+    /// * `capacity_bytes` must not be 0.
+    /// * `capacity_bytes` must be a multiple of `mem::size_of::<T>()`.
+    /// * `capacity_bytes` must not exceed [`Self::MAX_CAPACITY_BYTES`].
+    #[inline]
+    unsafe fn new_with_capacity_bytes_unchecked(capacity_bytes: usize, initial_value: T) -> Self {
+        // ZSTs are not supported for simplicity
+        assert!(size_of::<T>() > 0, "Zero sized types are not supported");
+
+        // SAFETY: Caller guarantees `capacity_bytes` satisfies requirements
+        let layout = Self::layout_for(capacity_bytes);
+        let ptr = alloc::alloc(layout);
+        if ptr.is_null() {
+            alloc::handle_alloc_error(layout);
+        }
+        // `layout_for` produces a layout with `T`'s alignment, so `ptr` is aligned for `T`
+        let ptr = ptr.cast::<T>();
+
+        // SAFETY: We checked `ptr` is non-null
+        let start = NonNull::new_unchecked(ptr);
+        // SAFETY: We allocated `capacity_bytes` bytes, so `end` is end of allocation
+        let end = NonNull::new_unchecked(ptr.byte_add(capacity_bytes));
+
+        // Write initial value to start of allocation.
+        // SAFETY: Allocation was created with alignment of `T`, and with capacity for at least 1 entry,
+        // so `start` is valid for writing a `T`.
+        start.as_ptr().write(initial_value);
+
+        // `cursor` is positioned at start i.e. pointing at initial value
+        Self { cursor: start, start, end }
+    }
+
+    /// Get layout for allocation of `capacity_bytes` bytes.
+    ///
+    /// # SAFETY
+    /// * `capacity_bytes` must not be 0.
+    /// * `capacity_bytes` must be a multiple of `mem::size_of::<T>()`.
+    /// * `capacity_bytes` must not exceed [`Self::MAX_CAPACITY_BYTES`].
+    #[inline]
+    unsafe fn layout_for(capacity_bytes: usize) -> Layout {
+        // `capacity_bytes` must not be 0 because stack can never be empty.
+        debug_assert!(capacity_bytes > 0);
+        // `capacity_bytes` must be a multiple of `size_of::<T>()` so that `new_cursor == self.end`
+        // check in `push` accurately detects when full to capacity
+        debug_assert!(capacity_bytes % size_of::<T>() == 0);
+        // `capacity_bytes` must not exceed `Self::MAX_CAPACITY_BYTES` to prevent creating an allocation
+        // of illegal size
+        debug_assert!(capacity_bytes <= Self::MAX_CAPACITY_BYTES);
+
+        // SAFETY: `align_of::<T>()` trivially satisfies alignment requirements.
+        // Caller guarantees `capacity_bytes <= MAX_CAPACITY_BYTES`.
+        // `MAX_CAPACITY_BYTES` takes into account the rounding-up by alignment requirement.
+        Layout::from_size_align_unchecked(capacity_bytes, align_of::<T>())
+    }
+
+    /// Get reference to last value on stack.
+    #[inline]
+    pub fn last(&self) -> &T {
+        // SAFETY: All methods ensure `self.cursor` is always in bounds, is aligned for `T`,
+        // and points to a valid initialized `T`
+        unsafe { self.cursor.as_ref() }
+    }
+
+    /// Get mutable reference to last value on stack.
+    #[inline]
+    pub fn last_mut(&mut self) -> &mut T {
+        // SAFETY: All methods ensure `self.cursor` is always in bounds, is aligned for `T`,
+        // and points to a valid initialized `T`
+        unsafe { self.cursor.as_mut() }
+    }
+
+    /// Push value to stack.
+    ///
+    /// # Panics
+    /// Panics if stack is already filled to maximum capacity.
+    #[inline]
+    pub fn push(&mut self, value: T) {
+        // SAFETY: Stack is never empty and `self.cursor` is always less than `self.end`, which is end
+        // of allocation. So advancing by a `T` cannot be out of bounds.
+        // The distance between `self.cursor` and `self.end` is always a multiple of `size_of::<T>()`,
+        // so `==` check is sufficient to detect when full to capacity.
+        let new_cursor = unsafe { NonNull::new_unchecked(self.cursor.as_ptr().add(1)) };
+        if new_cursor == self.end {
+            // Needs to grow
+            // SAFETY: Stack is full to capacity
+            unsafe { self.push_slow(value) };
+        } else {
+            // Capacity for at least 1 more entry
+            self.cursor = new_cursor;
+            // SAFETY: We checked there is capacity for 1 more entry, so `self.cursor` is in bounds.
+            // `self.cursor` was aligned for `T`, and we added `size_of::<T>()` to pointer.
+            // `size_of::<T>()` is always a multiple of `T`'s alignment, so `self.cursor` must still be
+            // aligned for `T`.
+            unsafe { self.cursor.as_ptr().write(value) };
+        }
+    }
+
+    /// Push value to stack when stack is full to capacity.
+    ///
+    /// This is the slow branch of `push`, which is rarely taken, so marked as `#[cold]` and
+    /// `#[inline(never)]` to make `push` as small as possible, so it can be inlined.
+    ///
+    /// # Panics
+    /// Panics if stack is already at maximum capacity.
+    ///
+    /// # SAFETY
+    /// Stack must be full to capacity. i.e. `self.cursor.add(1) == self.end`.
+    #[cold]
+    #[inline(never)]
+    unsafe fn push_slow(&mut self, value: T) {
+        // Get new capacity
+        let old_capacity_bytes = self.capacity_bytes();
+        // Capacity in bytes cannot be larger than `isize::MAX`, so `* 2` cannot overflow
+        let mut new_capacity_bytes = old_capacity_bytes * 2;
+        if new_capacity_bytes > Self::MAX_CAPACITY_BYTES {
+            assert!(
+                old_capacity_bytes < Self::MAX_CAPACITY_BYTES,
+                "Cannot grow beyond `Self::MAX_CAPACITY`"
+            );
+            new_capacity_bytes = Self::MAX_CAPACITY_BYTES;
+        }
+        debug_assert!(new_capacity_bytes > old_capacity_bytes);
+
+        // Reallocate.
+        // SAFETY:
+        // Stack is always allocated, and `self.start` and `self.end` are boundaries of that allocation.
+        // So `self.start` and `old_layout` accurately describe the current allocation.
+        // `old_capacity_bytes` was a multiple of `size_of::<T>()`, so double that must be too.
+        // `MAX_CAPACITY_BYTES` is also a multiple of `size_of::<T>()`.
+        // So `new_capacity_bytes` must be a multiple of `size_of::<T>()`.
+        // `new_capacity_bytes` is `<= MAX_CAPACITY_BYTES`, so is a legal allocation size.
+        // `layout_for` produces a layout with `T`'s alignment, so `new_ptr` is aligned for `T`.
+        let new_ptr = unsafe {
+            let old_ptr = self.start.as_ptr().cast::<u8>();
+            let old_layout = Self::layout_for(old_capacity_bytes);
+            let new_ptr = alloc::realloc(old_ptr, old_layout, new_capacity_bytes);
+            if new_ptr.is_null() {
+                let new_layout = Self::layout_for(new_capacity_bytes);
+                alloc::handle_alloc_error(new_layout);
+            }
+            new_ptr.cast::<T>()
+        };
+
+        // Update pointers.
+        // Stack was full to capacity, so new last index after push is the old capacity.
+        // i.e. `self.cursor - self.start == old_end - old_start`.
+        // Note: All pointers need to be updated even if allocation grew in place.
+        // From docs for `GlobalAlloc::realloc`:
+        // "Any access to the old `ptr` is Undefined Behavior, even if the allocation remained in-place."
+        // <https://doc.rust-lang.org/std/alloc/trait.GlobalAlloc.html#method.realloc>
+        // `end` changes whatever happens, so always need to be updated.
+        // `cursor` needs to be derived from `start` to make `offset_from` valid, so also needs updating.
+        // SAFETY: We checked that `new_ptr` is non-null.
+        // `old_capacity_bytes` and `new_capacity_bytes` are both multiples of `size_of::<T>()`.
+        // `size_of::<T>()` is always a multiple of `T`'s alignment, and `new_ptr` is aligned for `T`,
+        // so new `self.cursor` and `self.end` are aligned for `T`.
+        // `old_capacity_bytes` is always `< new_capacity_bytes`, so new `self.cursor` must be in bounds.
+        unsafe {
+            self.start = NonNull::new_unchecked(new_ptr);
+            self.end = NonNull::new_unchecked(new_ptr.byte_add(new_capacity_bytes));
+            self.cursor = NonNull::new_unchecked(new_ptr.byte_add(old_capacity_bytes));
+        }
+
+        // Write value.
+        // SAFETY: We just allocated additional capacity, so `self.cursor` is in bounds.
+        // `self.cursor` is aligned for `T`.
+        unsafe { self.cursor.as_ptr().write(value) }
+    }
+
+    /// Pop value from stack.
+    ///
+    /// # Panics
+    /// Panics if the stack has only 1 entry on it.
+    #[inline]
+    pub fn pop(&mut self) -> T {
+        // Panic if trying to remove last entry from stack
+        assert!(self.cursor != self.start, "Cannot pop all entries");
+
+        // SAFETY: Assertion above ensures stack has at least 2 entries
+        unsafe { self.pop_unchecked() }
+    }
+
+    /// Pop value from stack, without checking that stack isn't empty.
+    ///
+    /// # SAFETY
+    /// Stack must have at least 2 entries, so that after pop, it still has at least 1.
+    #[inline]
+    pub unsafe fn pop_unchecked(&mut self) -> T {
+        debug_assert!(self.cursor > self.start);
+        debug_assert!(self.cursor < self.end);
+        // SAFETY: All methods ensure `self.cursor` is always in bounds, is aligned for `T`,
+        // and points to a valid initialized `T`
+        let value = self.cursor.as_ptr().read();
+        // SAFETY: Caller guarantees there's at least 2 entries on stack, so subtracting 1
+        // cannot be out of bounds
+        self.cursor = NonNull::new_unchecked(self.cursor.as_ptr().sub(1));
+        value
+    }
+
+    /// Get number of values on stack.
+    ///
+    /// Number of entries is always at least 1. Stack is never empty.
+    #[inline]
+    pub fn len(&self) -> usize {
+        // `offset_from` returns offset in units of `T`.
+        // When stack has 1 entry, `start - cursor == 0`, so add 1 to get number of entries.
+        // SAFETY: `self.start` and `self.cursor` are both derived from same pointer
+        // (in `new_with_capacity_bytes_unchecked` and `push_slow`).
+        // Both pointers are always within bounds of a single allocation.
+        // Distance between pointers is always a multiple of `size_of::<T>()`.
+        // Byte size of allocation cannot exceed `isize::MAX`, so `+ 1` cannot wrap around.
+        // `self.cursor` is always >= `self.start`.
+        // `assert_unchecked!` is to help compiler to optimize.
+        // See: https://doc.rust-lang.org/std/primitive.pointer.html#method.sub_ptr
+        #[expect(clippy::cast_sign_loss)]
+        unsafe {
+            assert_unchecked!(self.cursor >= self.start);
+            self.cursor.as_ptr().offset_from(self.start.as_ptr()) as usize + 1
+        }
+    }
+
+    /// Get capacity.
+    #[inline]
+    #[cfg_attr(not(test), expect(dead_code))]
+    pub fn capacity(&self) -> usize {
+        // SAFETY: `self.start` and `self.end` are both derived from same pointer
+        // (in `new_with_capacity_bytes_unchecked` and `push_slow`).
+        // Both pointers are always within bounds of single allocation.
+        // Distance between pointers is always a multiple of `size_of::<T>()`.
+        // `self.end` is always > `self.start`, because stack is never empty.
+        // `assert_unchecked!` is to help compiler to optimize.
+        // See: https://doc.rust-lang.org/std/primitive.pointer.html#method.sub_ptr
+        #[expect(clippy::cast_sign_loss)]
+        unsafe {
+            assert_unchecked!(self.end > self.start);
+            self.end.as_ptr().offset_from(self.start.as_ptr()) as usize
+        }
+    }
+
+    /// Get capacity in bytes.
+    #[inline]
+    fn capacity_bytes(&self) -> usize {
+        // SAFETY: `self.start` and `self.end` are both derived from same pointer
+        // (in `new_with_capacity_bytes_unchecked` and `push_slow`).
+        // Both pointers are always within bounds of single allocation.
+        // Distance between pointers is always a multiple of `size_of::<T>()`.
+        // `self.end` is always > `self.start`, because stack is never empty.
+        // `assert_unchecked!` is to help compiler to optimize.
+        // See: https://doc.rust-lang.org/std/primitive.pointer.html#method.sub_ptr
+        #[expect(clippy::cast_sign_loss)]
+        unsafe {
+            assert_unchecked!(self.end > self.start);
+            self.end.as_ptr().byte_offset_from(self.start.as_ptr()) as usize
+        }
+    }
+}
+
+impl<T> Drop for NonEmptyStack<T> {
+    fn drop(&mut self) {
+        // Drop contents. This block copied from `std`'s `Vec`.
+        // Will be optimized out if `T` is non-drop, as `drop_in_place` calls `std::mem::needs_drop`.
+        // SAFETY: Stack contains `self.len()` initialized entries, starting at `self.start`.
+        unsafe {
+            ptr::drop_in_place(ptr::slice_from_raw_parts_mut(self.start.as_ptr(), self.len()));
+        }
+
+        // Drop the memory
+        // SAFETY:
+        // Stack is always allocated, and `self.start` and `self.end` are boundaries of that allocation.
+        // So `self.start` and `layout` accurately describe the current allocation.
+        unsafe {
+            let layout = Self::layout_for(self.capacity_bytes());
+            alloc::dealloc(self.start.as_ptr().cast::<u8>(), layout);
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    macro_rules! assert_len_cap_last {
+        ($stack:ident, $len:expr, $capacity:expr, $last:expr) => {
+            assert_eq!($stack.len(), $len);
+            assert_eq!($stack.capacity(), $capacity);
+            assert_eq!($stack.last(), $last);
+        };
+    }
+
+    #[test]
+    fn new() {
+        let stack = NonEmptyStack::new(true);
+        assert_len_cap_last!(stack, 1, 8, &true);
+        assert_eq!(stack.capacity_bytes(), 8);
+
+        let stack = NonEmptyStack::new(10u64);
+        assert_len_cap_last!(stack, 1, 4, &10);
+        assert_eq!(stack.capacity_bytes(), 32);
+
+        let stack = NonEmptyStack::new([10u8; 1024]);
+        assert_len_cap_last!(stack, 1, 4, &[10; 1024]);
+        assert_eq!(stack.capacity_bytes(), 4096);
+
+        let stack = NonEmptyStack::new([10u8; 1025]);
+        assert_len_cap_last!(stack, 1, 1, &[10; 1025]);
+        assert_eq!(stack.capacity_bytes(), 1025);
+    }
+
+    #[test]
+    fn with_capacity() {
+        let stack = NonEmptyStack::with_capacity(16, 10u64);
+        assert_len_cap_last!(stack, 1, 16, &10);
+        assert_eq!(stack.capacity_bytes(), 128);
+    }
+
+    #[test]
+    #[should_panic(expected = "`capacity` cannot be zero")]
+    fn with_capacity_zero() {
+        NonEmptyStack::with_capacity(0, 10u64);
+    }
+
+    #[test]
+    fn push_then_pop() {
+        let mut stack = NonEmptyStack::new(10u64);
+        assert_len_cap_last!(stack, 1, 4, &10);
+        assert_eq!(stack.capacity_bytes(), 32);
+
+        stack.push(20);
+        assert_len_cap_last!(stack, 2, 4, &20);
+        stack.push(30);
+        assert_len_cap_last!(stack, 3, 4, &30);
+
+        stack.push(40);
+        assert_len_cap_last!(stack, 4, 4, &40);
+        assert_eq!(stack.capacity_bytes(), 32);
+        stack.push(50);
+        assert_len_cap_last!(stack, 5, 8, &50);
+        assert_eq!(stack.capacity_bytes(), 64);
+
+        stack.push(60);
+        assert_len_cap_last!(stack, 6, 8, &60);
+        stack.push(70);
+        assert_len_cap_last!(stack, 7, 8, &70);
+
+        stack.push(80);
+        assert_len_cap_last!(stack, 8, 8, &80);
+        assert_eq!(stack.capacity_bytes(), 64);
+
+        stack.push(90);
+        assert_len_cap_last!(stack, 9, 16, &90);
+        assert_eq!(stack.capacity_bytes(), 128);
+
+        assert_eq!(stack.pop(), 90);
+        assert_len_cap_last!(stack, 8, 16, &80);
+        assert_eq!(stack.pop(), 80);
+        assert_len_cap_last!(stack, 7, 16, &70);
+        assert_eq!(stack.pop(), 70);
+        assert_len_cap_last!(stack, 6, 16, &60);
+        assert_eq!(stack.pop(), 60);
+        assert_len_cap_last!(stack, 5, 16, &50);
+        assert_eq!(stack.pop(), 50);
+        assert_len_cap_last!(stack, 4, 16, &40);
+        assert_eq!(stack.pop(), 40);
+        assert_len_cap_last!(stack, 3, 16, &30);
+        assert_eq!(stack.pop(), 30);
+        assert_len_cap_last!(stack, 2, 16, &20);
+        assert_eq!(stack.pop(), 20);
+        assert_len_cap_last!(stack, 1, 16, &10);
+        assert_eq!(stack.capacity_bytes(), 128);
+    }
+
+    #[test]
+    fn push_and_pop_mixed() {
+        let mut stack = NonEmptyStack::new(10u64);
+        assert_len_cap_last!(stack, 1, 4, &10);
+        assert_eq!(stack.capacity_bytes(), 32);
+
+        stack.push(20);
+        assert_len_cap_last!(stack, 2, 4, &20);
+        stack.push(30);
+        assert_len_cap_last!(stack, 3, 4, &30);
+
+        assert_eq!(stack.pop(), 30);
+        assert_len_cap_last!(stack, 2, 4, &20);
+
+        stack.push(31);
+        assert_len_cap_last!(stack, 3, 4, &31);
+        stack.push(40);
+        assert_len_cap_last!(stack, 4, 4, &40);
+        stack.push(50);
+        assert_len_cap_last!(stack, 5, 8, &50);
+
+        assert_eq!(stack.pop(), 50);
+        assert_len_cap_last!(stack, 4, 8, &40);
+        assert_eq!(stack.pop(), 40);
+        assert_len_cap_last!(stack, 3, 8, &31);
+        assert_eq!(stack.pop(), 31);
+        assert_len_cap_last!(stack, 2, 8, &20);
+
+        stack.push(32);
+        assert_len_cap_last!(stack, 3, 8, &32);
+
+        assert_eq!(stack.pop(), 32);
+        assert_len_cap_last!(stack, 2, 8, &20);
+        assert_eq!(stack.pop(), 20);
+        assert_len_cap_last!(stack, 1, 8, &10);
+    }
+
+    #[test]
+    #[should_panic(expected = "Cannot pop all entries")]
+    fn pop_panic() {
+        let mut stack = NonEmptyStack::new(10u64);
+        stack.pop();
+    }
+
+    #[test]
+    #[should_panic(expected = "Cannot pop all entries")]
+    fn pop_panic2() {
+        let mut stack = NonEmptyStack::new(10u64);
+        stack.push(20);
+        stack.push(30);
+        stack.pop();
+        stack.pop();
+        stack.pop();
+    }
+
+    #[test]
+    fn last_mut() {
+        let mut stack = NonEmptyStack::new(10u64);
+        assert_len_cap_last!(stack, 1, 4, &10);
+
+        *stack.last_mut() = 11;
+        assert_len_cap_last!(stack, 1, 4, &11);
+        *stack.last_mut() = 12;
+        assert_len_cap_last!(stack, 1, 4, &12);
+
+        stack.push(20);
+        assert_len_cap_last!(stack, 2, 4, &20);
+        *stack.last_mut() = 21;
+        assert_len_cap_last!(stack, 2, 4, &21);
+        *stack.last_mut() = 22;
+        assert_len_cap_last!(stack, 2, 4, &22);
+    }
+
+    #[test]
+    #[expect(clippy::items_after_statements)]
+    fn drop() {
+        use std::sync::{Mutex, OnceLock};
+
+        static DROPS: OnceLock<Mutex<Vec<u32>>> = OnceLock::new();
+        DROPS.get_or_init(|| Mutex::new(vec![]));
+
+        fn drops() -> Vec<u32> {
+            std::mem::take(DROPS.get().unwrap().lock().unwrap().as_mut())
+        }
+
+        #[derive(PartialEq, Debug)]
+        struct Droppy(u32);
+
+        impl Drop for Droppy {
+            fn drop(&mut self) {
+                DROPS.get().unwrap().lock().unwrap().push(self.0);
+            }
+        }
+
+        {
+            let mut stack = NonEmptyStack::new(Droppy(10));
+            stack.push(Droppy(20));
+            stack.push(Droppy(30));
+            assert_eq!(stack.len(), 3);
+            assert_eq!(stack.capacity(), 4);
+
+            stack.pop();
+            assert_eq!(drops(), &[30]);
+            assert!(drops().is_empty());
+
+            stack.push(Droppy(31));
+            stack.push(Droppy(40));
+            stack.push(Droppy(50));
+            assert_eq!(stack.len(), 5);
+            assert_eq!(stack.capacity(), 8);
+            assert!(drops().is_empty());
+        }
+
+        assert_eq!(drops(), &[10, 20, 31, 40, 50]);
+    }
+}
diff --git a/crates/oxc_transformer/src/helpers/stack/sparse.rs b/crates/oxc_transformer/src/helpers/stack/sparse.rs
index bb768ce61b6376..13b33752134bd8 100644
--- a/crates/oxc_transformer/src/helpers/stack/sparse.rs
+++ b/crates/oxc_transformer/src/helpers/stack/sparse.rs
@@ -1,3 +1,5 @@
+use super::NonEmptyStack;
+
 /// Stack which is sparsely filled.
 ///
 /// Functionally equivalent to a stack implemented as `Vec<Option<T>>`, but more memory-efficient
@@ -22,7 +24,7 @@
 /// When the stack grows and reallocates, `SparseStack` has less memory to copy, which is a performance
 /// win too.
 pub struct SparseStack<T> {
-    has_values: Vec<bool>,
+    has_values: NonEmptyStack<bool>,
     values: Vec<T>,
 }
 
@@ -32,7 +34,7 @@ impl<T> SparseStack<T> {
         // `has_values` starts with a single empty entry, which will never be popped off.
         // This means `take_last`, `last_or_init`, and `last_mut_or_init` can all be infallible,
         // as there's always an entry on the stack to read.
-        Self { has_values: vec![false], values: vec![] }
+        Self { has_values: NonEmptyStack::new(false), values: vec![] }
     }
 
     /// Push an entry to the stack.
@@ -53,25 +55,7 @@ impl<T> SparseStack<T> {
     /// Panics if the stack has only 1 entry on it.
     #[inline]
     pub fn pop(&mut self) -> Option<T> {
-        // SAFETY: `self.has_values` starts with 1 entry. Only `pop` removes entries from it.
-        // We check that popping an entry does not leave the stack empty before performing the pop.
-        // So `self.has_values` can never be left in an empty state.
-        //
-        // This would be equivalent:
-        // ```
-        // assert!(self.has_values.len() > 1);
-        // self.has_values.pop().unwrap()
-        // ```
-        // But checking `original_len > 1` is 1 more CPU op than decrementing length first,
-        // and then checking for `new_len > 0`. https://godbolt.org/z/eqx385E5K
-        let has_value = unsafe {
-            let new_len = self.has_values.len() - 1;
-            assert!(new_len > 0);
-            let has_value = *self.has_values.get_unchecked(new_len);
-            self.has_values.set_len(new_len);
-            has_value
-        };
-
+        let has_value = self.has_values.pop();
         if has_value {
             debug_assert!(!self.values.is_empty());
             // SAFETY: Last `self.has_values` is only `true` if there's a corresponding value in `self.values`.
@@ -88,10 +72,7 @@ impl<T> SparseStack<T> {
     /// Get value of last entry on the stack.
     #[inline]
     pub fn last(&self) -> Option<&T> {
-        debug_assert!(!self.has_values.is_empty());
-        // SAFETY: `self.has_values` starts with 1 entry. Only `pop` removes entries from it,
-        // and it ensures `self.has_values` always has at least 1 entry.
-        let has_value = unsafe { *self.has_values.last().unwrap_unchecked() };
+        let has_value = *self.has_values.last();
         if has_value {
             debug_assert!(!self.values.is_empty());
             // SAFETY: Last `self.has_values` is only `true` if there's a corresponding value in `self.values`.
@@ -106,10 +87,7 @@ impl<T> SparseStack<T> {
     /// Take value from last entry on the stack, leaving last entry empty.
     #[inline]
     pub fn take_last(&mut self) -> Option<T> {
-        debug_assert!(!self.has_values.is_empty());
-        // SAFETY: `self.has_values` starts with 1 entry. Only `pop` removes entries from it,
-        // and it ensures `self.has_values` always has at least 1 entry.
-        let has_value = unsafe { self.has_values.last_mut().unwrap_unchecked() };
+        let has_value = self.has_values.last_mut();
         if *has_value {
             *has_value = false;
 
@@ -129,10 +107,7 @@ impl<T> SparseStack<T> {
     /// Return reference to value.
     #[inline]
     pub fn last_or_init<I: FnOnce() -> T>(&mut self, init: I) -> &T {
-        debug_assert!(!self.has_values.is_empty());
-        // SAFETY: `self.has_values` starts with 1 entry. Only `pop` removes entries from it,
-        // and it ensures `self.has_values` always has at least 1 entry.
-        let has_value = unsafe { self.has_values.last_mut().unwrap_unchecked() };
+        let has_value = self.has_values.last_mut();
         if !*has_value {
             *has_value = true;
             self.values.push(init());
@@ -150,10 +125,7 @@ impl<T> SparseStack<T> {
     /// Return mutable reference to value.
     #[inline]
     pub fn last_mut_or_init<I: FnOnce() -> T>(&mut self, init: I) -> &mut T {
-        debug_assert!(!self.has_values.is_empty());
-        // SAFETY: `self.has_values` starts with 1 entry. Only `pop` removes entries from it,
-        // and it ensures `self.has_values` always has at least 1 entry.
-        let has_value = unsafe { self.has_values.last_mut().unwrap_unchecked() };
+        let has_value = self.has_values.last_mut();
         if !*has_value {
             *has_value = true;
             self.values.push(init());