Skip to content

Commit

Permalink
Optimize list<u8> lifting and lowering (bytecodealliance#6971)
Browse files Browse the repository at this point in the history
This commit optimizes the lifting and lowering routines for `list<u8>`
and other similar primitive integer types. The current lifting/lowering
is intended to be general-purpose and correct but doesn't optimize well
with LLVM for a number of reasons. I first attempted to reshape the
general-purpose code to be easier for LLVM to optimize but in the end
was unable to convince LLVM that various pointers here don't alias which
meant that the general-purpose lowering/lifting never optimized well.

Instead, however, I've added new trait methods which are implemented the
same way as the general purpose methods beforehand. The
integer/primitive implementations overwrite these implementations with
more specialized versions given knowledge of primitives.

On a local benchmark this makes lifting/lowering disappear from a
profile since memcpy is generally much faster than per-item processing.
  • Loading branch information
alexcrichton authored and eduardomourar committed Sep 6, 2023
1 parent ee929b7 commit fb26c56
Showing 1 changed file with 95 additions and 8 deletions.
103 changes: 95 additions & 8 deletions crates/wasmtime/src/component/func/typed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,31 @@ pub unsafe trait Lower: ComponentType {
ty: InterfaceType,
offset: usize,
) -> Result<()>;

/// Provided method to lower a list of `Self` into memory.
///
/// Requires that `offset` has already been checked for alignment and
/// validity in terms of being in-bounds, otherwise this may panic.
///
/// This is primarily here to get overridden for implementations of integers
/// which can avoid some extra fluff and use a pattern that's more easily
/// optimizable by LLVM.
#[doc(hidden)]
fn store_list<T>(
cx: &mut LowerContext<'_, T>,
ty: InterfaceType,
mut offset: usize,
items: &[Self],
) -> Result<()>
where
Self: Sized,
{
for item in items {
item.store(cx, ty, offset)?;
offset += Self::SIZE32;
}
Ok(())
}
}

/// Host types which can be created from the canonical ABI.
Expand Down Expand Up @@ -547,6 +572,21 @@ pub unsafe trait Lift: Sized + ComponentType {
/// for `Op::Lift` this needs to be overridden.
#[doc(hidden)]
fn load(cx: &mut LiftContext<'_>, ty: InterfaceType, bytes: &[u8]) -> Result<Self>;

/// Converts `list` into a `Vec<T>`, used in `Lift for Vec<T>`.
///
/// This is primarily here to get overridden for implementations of integers
/// which can avoid some extra fluff and use a pattern that's more easily
/// optimizable by LLVM.
#[doc(hidden)]
fn load_list(cx: &mut LiftContext<'_>, list: &WasmList<Self>) -> Result<Vec<Self>>
where
Self: Sized,
{
(0..list.len)
.map(|index| list.get_from_store(cx, index).unwrap())
.collect()
}
}

// Macro to help generate "forwarding implementations" of `ComponentType` to
Expand Down Expand Up @@ -636,12 +676,12 @@ macro_rules! forward_list_lifts {
unsafe impl <T: Lift> Lift for $a {
fn lift(cx: &mut LiftContext<'_>, ty: InterfaceType, src: &Self::Lower) -> Result<Self> {
let list = <WasmList::<T> as Lift>::lift(cx, ty, src)?;
(0..list.len).map(|index| list.get_from_store(cx, index).unwrap()).collect()
Ok(T::load_list(cx, &list)?.into())
}

fn load(cx: &mut LiftContext<'_>, ty: InterfaceType, bytes: &[u8]) -> Result<Self> {
let list = <WasmList::<T> as Lift>::load(cx, ty, bytes)?;
(0..list.len).map(|index| list.get_from_store(cx, index).unwrap()).collect()
Ok(T::load_list(cx, &list)?.into())
}
}
)*)
Expand Down Expand Up @@ -694,6 +734,43 @@ macro_rules! integers {
*cx.get(offset) = self.to_le_bytes();
Ok(())
}

fn store_list<T>(
cx: &mut LowerContext<'_, T>,
ty: InterfaceType,
offset: usize,
items: &[Self],
) -> Result<()> {
debug_assert!(matches!(ty, InterfaceType::$ty));

// Double-check that the CM alignment is at least the host's
// alignment for this type which should be true for all
// platforms.
assert!((Self::ALIGN32 as usize) >= mem::align_of::<Self>());

// Slice `cx`'s memory to the window that we'll be modifying.
// This should all have already been verified in terms of
// alignment and sizing meaning that these assertions here are
// not truly necessary but are instead double-checks.
//
// Note that we're casting a `[u8]` slice to `[Self]` with
// `align_to_mut` which is not safe in general but is safe in
// our specific case as all `u8` patterns are valid `Self`
// patterns since `Self` is an integral type.
let dst = &mut cx.as_slice_mut()[offset..][..items.len() * Self::SIZE32];
let (before, middle, end) = unsafe { dst.align_to_mut::<Self>() };
assert!(before.is_empty() && end.is_empty());
assert_eq!(middle.len(), items.len());

// And with all that out of the way perform the copying loop.
// This is not a `copy_from_slice` because endianness needs to
// be handled here, but LLVM should pretty easily transform this
// into a memcpy on little-endian platforms.
for (dst, src) in middle.iter_mut().zip(items) {
*dst = src.to_le();
}
Ok(())
}
}

unsafe impl Lift for $primitive {
Expand All @@ -709,6 +786,15 @@ macro_rules! integers {
debug_assert!((bytes.as_ptr() as usize) % Self::SIZE32 == 0);
Ok($primitive::from_le_bytes(bytes.try_into().unwrap()))
}

fn load_list(cx: &mut LiftContext<'_>, list: &WasmList<Self>) -> Result<Vec<Self>> {
Ok(
list._as_le_slice(cx.memory())
.iter()
.map(|i| Self::from_le(*i))
.collect(),
)
}
}
)*)
}
Expand Down Expand Up @@ -1310,11 +1396,7 @@ where
.checked_mul(elem_size)
.ok_or_else(|| anyhow!("size overflow copying a list"))?;
let ptr = cx.realloc(0, 0, T::ALIGN32, size)?;
let mut cur = ptr;
for item in list {
item.store(cx, ty, cur)?;
cur += elem_size;
}
T::store_list(cx, ty, ptr, list)?;
Ok((ptr, list.len()))
}

Expand Down Expand Up @@ -1454,9 +1536,14 @@ macro_rules! raw_wasm_list_accessors {
/// Panics if the `store` provided is not the one from which this
/// slice originated.
pub fn as_le_slice<'a, T: 'a>(&self, store: impl Into<StoreContext<'a, T>>) -> &'a [$i] {
let memory = self.options.memory(store.into().0);
self._as_le_slice(memory)
}

fn _as_le_slice<'a>(&self, all_of_memory: &'a [u8]) -> &'a [$i] {
// See comments in `WasmList::get` for the panicking indexing
let byte_size = self.len * mem::size_of::<$i>();
let bytes = &self.options.memory(store.into().0)[self.ptr..][..byte_size];
let bytes = &all_of_memory[self.ptr..][..byte_size];

// The canonical ABI requires that everything is aligned to its
// own size, so this should be an aligned array. Furthermore the
Expand Down

0 comments on commit fb26c56

Please sign in to comment.