From 2f494240f8e9b6c01192a763da48db76f57a974e Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 28 Jan 2022 16:10:05 -0600 Subject: [PATCH] Lazily allocate the bump-alloc chunk in the externref table (#3739) This commit updates the allocation of a `VMExternRefActivationsTable` structure to perform zero malloc memory allocations. Previously it would allocate a page-size of `chunk` plus some space in hash sets for future insertions. The main trick here implemented is that after the first gc during the slow path the fast chunk allocation is allocated and configured. The motivation for this PR is that given our recent work to further refine and optimize the instantiation process this allocation started to show up in a nontrivial fashion. Most modules today never touch this table anyway as almost none of them use reference types, so the time spent allocation and deallocating the table per-store was largely wasted time. Concretely on a microbenchmark this PR speeds up instantiation of a module with one function by 30%, decreasing the instantiation cost from 1.8us to 1.2us. Overall a pretty minor win but when the instantiation times we're measuring start being in the single-digit microseconds this win ends up getting magnified! --- crates/runtime/src/externref.rs | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/crates/runtime/src/externref.rs b/crates/runtime/src/externref.rs index 573dd4b24f5a..6dd59ad747af 100644 --- a/crates/runtime/src/externref.rs +++ b/crates/runtime/src/externref.rs @@ -566,8 +566,13 @@ impl VMExternRefActivationsTable { /// Create a new `VMExternRefActivationsTable`. pub fn new() -> Self { - let mut chunk = Self::new_chunk(Self::CHUNK_SIZE); - let next = chunk.as_mut_ptr().cast::(); + // Start with an empty chunk in case this activations table isn't used. + // This means that there's no space in the bump-allocation area which + // will force any path trying to use this to the slow gc path. The first + // time this happens, though, the slow gc path will allocate a new chunk + // for actual fast-bumping. + let mut chunk: Box<[TableElem]> = Box::new([]); + let next = chunk.as_mut_ptr(); let end = unsafe { next.add(chunk.len()) }; VMExternRefActivationsTable { @@ -576,8 +581,8 @@ impl VMExternRefActivationsTable { end: NonNull::new(end).unwrap(), chunk, }, - over_approximated_stack_roots: HashSet::with_capacity(Self::CHUNK_SIZE), - precise_stack_roots: HashSet::with_capacity(Self::CHUNK_SIZE), + over_approximated_stack_roots: HashSet::new(), + precise_stack_roots: HashSet::new(), stack_canary: None, #[cfg(debug_assertions)] gc_okay: true, @@ -728,9 +733,18 @@ impl VMExternRefActivationsTable { "after sweeping the bump chunk, all slots should be `None`" ); + // If this is the first instance of gc then the initial chunk is empty, + // so we lazily allocate space for fast bump-allocation in the future. + if self.alloc.chunk.is_empty() { + self.alloc.chunk = Self::new_chunk(Self::CHUNK_SIZE); + self.alloc.end = + NonNull::new(unsafe { self.alloc.chunk.as_mut_ptr().add(self.alloc.chunk.len()) }) + .unwrap(); + } + // Reset our `next` finger to the start of the bump allocation chunk. unsafe { - let next = self.alloc.chunk.as_mut_ptr().cast::(); + let next = self.alloc.chunk.as_mut_ptr(); debug_assert!(!next.is_null()); *self.alloc.next.get() = NonNull::new_unchecked(next); }