From 8d5670e72bb930f18c5d1d4262caa80cae0be03a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Mon, 15 Jul 2024 00:37:38 +0900 Subject: [PATCH] refactor(allocator): Use `&` instead of a thread-local (#9235) **Description:** This is a part of https://github.com/swc-project/swc/pull/9230. I profiled the performance, and `thread_local` took too long to get the address of the thread-local variable. So, I inlined the reference into the allocator. # Benchmark result ``` Gnuplot not found, using plotters backend common/allocator/alloc/std/1000000 time: [4.9478 ms 4.9653 ms 4.9922 ms] Found 17 outliers among 100 measurements (17.00%) 4 (4.00%) high mild 13 (13.00%) high severe common/allocator/alloc/no-scope/1000000 time: [5.4821 ms 5.4938 ms 5.5068 ms] Found 17 outliers among 100 measurements (17.00%) 2 (2.00%) high mild 15 (15.00%) high severe common/allocator/alloc/scoped/1000000 time: [3.1401 ms 3.1456 ms 3.1518 ms] Found 12 outliers among 100 measurements (12.00%) 3 (3.00%) high mild 9 (9.00%) high severe common/allocator/alloc/cached-no-scope/1000000 time: [5.0992 ms 5.1090 ms 5.1198 ms] Found 11 outliers among 100 measurements (11.00%) 2 (2.00%) high mild 9 (9.00%) high severe common/allocator/alloc/cached-scoped/1000000 time: [3.0191 ms 3.0230 ms 3.0273 ms] Found 11 outliers among 100 measurements (11.00%) 2 (2.00%) low mild 1 (1.00%) high mild 8 (8.00%) high severe ``` --- Cargo.lock | 5 +- crates/swc_allocator/Cargo.toml | 1 + crates/swc_allocator/benches/bench.rs | 6 +- crates/swc_allocator/src/alloc.rs | 109 +++++++++++++------------- crates/swc_allocator/src/boxed/mod.rs | 16 ++-- crates/swc_allocator/src/lib.rs | 28 +------ crates/swc_allocator/src/vec/mod.rs | 22 +++--- crates/swc_allocator/tests/apis.rs | 4 +- 8 files changed, 86 insertions(+), 105 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7a6dfd085913..3ad85d15603f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3710,6 +3710,7 @@ dependencies = [ "serde", "serde_derive", "swc_malloc", + "triomphe", ] [[package]] @@ -5876,9 +5877,9 @@ dependencies = [ [[package]] name = "triomphe" -version = "0.1.11" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "859eb650cfee7434994602c3a68b25d77ad9e68c8a6cd491616ef86661382eb3" +checksum = "e6631e42e10b40c0690bf92f404ebcfe6e1fdb480391d15f17cc8e96eeed5369" dependencies = [ "serde", "stable_deref_trait", diff --git a/crates/swc_allocator/Cargo.toml b/crates/swc_allocator/Cargo.toml index c99011823e67..54182624dd86 100644 --- a/crates/swc_allocator/Cargo.toml +++ b/crates/swc_allocator/Cargo.toml @@ -24,6 +24,7 @@ rkyv = { workspace = true, optional = true } scoped-tls = { workspace = true } serde = { workspace = true, optional = true } serde_derive = { workspace = true, optional = true } +triomphe = "0.1.13" [dev-dependencies] diff --git a/crates/swc_allocator/benches/bench.rs b/crates/swc_allocator/benches/bench.rs index 6b27b4bf26d7..55deaef17ab0 100644 --- a/crates/swc_allocator/benches/bench.rs +++ b/crates/swc_allocator/benches/bench.rs @@ -1,7 +1,7 @@ extern crate swc_malloc; use codspeed_criterion_compat::{black_box, criterion_group, criterion_main, Bencher, Criterion}; -use swc_allocator::{FastAlloc, MemorySpace}; +use swc_allocator::{FastAlloc, SwcAllocator}; fn bench_alloc(c: &mut Criterion) { fn direct_alloc_std(b: &mut Bencher, times: usize) { @@ -40,7 +40,7 @@ fn bench_alloc(c: &mut Criterion) { fn direct_alloc_scoped(b: &mut Bencher, times: usize) { b.iter(|| { - let allocator = MemorySpace::default(); + let allocator = SwcAllocator::default(); allocator.scope(|| { let mut vec = swc_allocator::vec::Vec::new(); @@ -56,7 +56,7 @@ fn bench_alloc(c: &mut Criterion) { fn fast_alloc_scoped(b: &mut Bencher, times: usize) { b.iter(|| { - MemorySpace::default().scope(|| { + SwcAllocator::default().scope(|| { let allocator = FastAlloc::default(); let mut vec = allocator.vec(); diff --git a/crates/swc_allocator/src/alloc.rs b/crates/swc_allocator/src/alloc.rs index 7d109734bdd8..29e3b1e1153c 100644 --- a/crates/swc_allocator/src/alloc.rs +++ b/crates/swc_allocator/src/alloc.rs @@ -1,44 +1,51 @@ -use std::{alloc::Layout, ptr::NonNull}; +use std::{alloc::Layout, mem::transmute, ptr::NonNull}; use allocator_api2::alloc::Global; use scoped_tls::scoped_thread_local; use crate::{FastAlloc, MemorySpace}; -scoped_thread_local!(pub(crate) static ALLOC: MemorySpace); +scoped_thread_local!(pub(crate) static ALLOC: &'static SwcAllocator); -#[derive(Debug, Clone, Copy)] -pub struct SwcAlloc { - pub(crate) is_arena_mode: bool, -} +#[derive(Default)] +pub struct SwcAllocator(MemorySpace); -impl Default for FastAlloc { - fn default() -> Self { - Self { - is_arena_mode: ALLOC.is_set(), - } +impl SwcAllocator { + /// Invokes `f` in a scope where the allocations are done in this allocator. + #[inline(always)] + pub fn scope<'a, F, R>(&'a self, f: F) -> R + where + F: FnOnce() -> R, + { + let s = unsafe { + // Safery: We are using a scoped API + transmute::<&'a SwcAllocator, &'static SwcAllocator>(self) + }; + + ALLOC.set(&s, f) } } -impl Default for SwcAlloc { +impl Default for FastAlloc { fn default() -> Self { - SwcAlloc { - is_arena_mode: ALLOC.is_set(), + Self { + alloc: if ALLOC.is_set() { + Some(ALLOC.with(|v| *v)) + } else { + None + }, } } } -impl SwcAlloc { +impl FastAlloc { /// `true` is passed to `f` if the box is allocated with a custom allocator. fn with_allocator( &self, f: impl FnOnce(&dyn allocator_api2::alloc::Allocator, bool) -> T, ) -> T { - if self.is_arena_mode { - ALLOC.with(|a| { - // - f(&&**a as &dyn allocator_api2::alloc::Allocator, true) - }) + if let Some(arena) = &self.alloc { + f((&&*arena.0) as &dyn allocator_api2::alloc::Allocator, true) } else { f(&allocator_api2::alloc::Global, false) } @@ -49,7 +56,7 @@ fn mark_ptr_as_arena_mode(ptr: NonNull<[u8]>) -> NonNull<[u8]> { ptr } -unsafe impl allocator_api2::alloc::Allocator for SwcAlloc { +unsafe impl allocator_api2::alloc::Allocator for FastAlloc { fn allocate(&self, layout: Layout) -> Result, allocator_api2::alloc::AllocError> { self.with_allocator(|a, is_arena_mode| { let ptr = a.allocate(layout)?; @@ -78,18 +85,13 @@ unsafe impl allocator_api2::alloc::Allocator for SwcAlloc { } unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { - if self.is_arena_mode { + if self.alloc.is_some() { debug_assert!( ALLOC.is_set(), "Deallocating a pointer allocated with arena mode with a non-arena mode allocator" ); - ALLOC.with(|alloc| { - unsafe { - // Safety: We are in unsafe fn - (&**alloc).deallocate(ptr, layout) - } - }) + self.with_allocator(|alloc, _| alloc.deallocate(ptr, layout)) } else { Global.deallocate(ptr, layout) } @@ -101,16 +103,15 @@ unsafe impl allocator_api2::alloc::Allocator for SwcAlloc { old_layout: Layout, new_layout: Layout, ) -> Result, allocator_api2::alloc::AllocError> { - if self.is_arena_mode { - debug_assert!( - ALLOC.is_set(), - "Growing a pointer allocated with arena mode with a non-arena mode allocator" - ); + self.with_allocator(|alloc, is_arena_mode| { + let ptr = alloc.grow(ptr, old_layout, new_layout)?; - ALLOC.with(|alloc| (&**alloc).grow(ptr, old_layout, new_layout)) - } else { - Global.grow(ptr, old_layout, new_layout) - } + if is_arena_mode { + Ok(mark_ptr_as_arena_mode(ptr)) + } else { + Ok(ptr) + } + }) } unsafe fn grow_zeroed( @@ -119,16 +120,15 @@ unsafe impl allocator_api2::alloc::Allocator for SwcAlloc { old_layout: Layout, new_layout: Layout, ) -> Result, allocator_api2::alloc::AllocError> { - if self.is_arena_mode { - debug_assert!( - ALLOC.is_set(), - "Growing a pointer allocated with arena mode with a non-arena mode allocator" - ); + self.with_allocator(|alloc, is_arena_mode| { + let ptr = alloc.grow_zeroed(ptr, old_layout, new_layout)?; - ALLOC.with(|alloc| (&**alloc).grow_zeroed(ptr, old_layout, new_layout)) - } else { - Global.grow_zeroed(ptr, old_layout, new_layout) - } + if is_arena_mode { + Ok(mark_ptr_as_arena_mode(ptr)) + } else { + Ok(ptr) + } + }) } unsafe fn shrink( @@ -137,16 +137,15 @@ unsafe impl allocator_api2::alloc::Allocator for SwcAlloc { old_layout: Layout, new_layout: Layout, ) -> Result, allocator_api2::alloc::AllocError> { - if self.is_arena_mode { - debug_assert!( - ALLOC.is_set(), - "Shrinking a pointer allocated with arena mode with a non-arena mode allocator" - ); + self.with_allocator(|alloc, is_arena_mode| { + let ptr = alloc.shrink(ptr, old_layout, new_layout)?; - ALLOC.with(|alloc| (&**alloc).shrink(ptr, old_layout, new_layout)) - } else { - Global.shrink(ptr, old_layout, new_layout) - } + if is_arena_mode { + Ok(mark_ptr_as_arena_mode(ptr)) + } else { + Ok(ptr) + } + }) } fn by_ref(&self) -> &Self diff --git a/crates/swc_allocator/src/boxed/mod.rs b/crates/swc_allocator/src/boxed/mod.rs index 2748adb6caba..8ea7fd38c2de 100644 --- a/crates/swc_allocator/src/boxed/mod.rs +++ b/crates/swc_allocator/src/boxed/mod.rs @@ -7,7 +7,7 @@ use std::{ pin::Pin, }; -use crate::{alloc::SwcAlloc, FastAlloc}; +use crate::FastAlloc; #[cfg(feature = "rkyv")] mod rkyv; @@ -23,7 +23,7 @@ mod serde; /// The last bit is 1 if the box is allocated with a custom allocator. #[repr(transparent)] #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Box(pub(crate) allocator_api2::boxed::Box); +pub struct Box(pub(crate) allocator_api2::boxed::Box); impl From for Box { #[inline(always)] @@ -32,9 +32,9 @@ impl From for Box { } } -impl From> for Box { +impl From> for Box { #[inline(always)] - fn from(v: allocator_api2::boxed::Box) -> Self { + fn from(v: allocator_api2::boxed::Box) -> Self { Box(v) } } @@ -56,7 +56,7 @@ impl Box { pub fn new(value: T) -> Self { Self(allocator_api2::boxed::Box::new_in( value, - SwcAlloc::default(), + FastAlloc::default(), )) } @@ -111,7 +111,7 @@ impl Box { pub unsafe fn from_raw(raw: *mut T) -> Self { Self(allocator_api2::boxed::Box::from_raw_in( raw, - SwcAlloc::default(), + FastAlloc::default(), )) } @@ -629,7 +629,7 @@ where } impl FastAlloc { - pub fn alloc(self, t: T) -> Box { - Box(allocator_api2::boxed::Box::new_in(t, self.swc_alloc())) + pub fn alloc(&self, t: T) -> Box { + Box(allocator_api2::boxed::Box::new_in(t, self.clone())) } } diff --git a/crates/swc_allocator/src/lib.rs b/crates/swc_allocator/src/lib.rs index f45ba6568c12..3066fdd088f7 100644 --- a/crates/swc_allocator/src/lib.rs +++ b/crates/swc_allocator/src/lib.rs @@ -4,46 +4,26 @@ #![allow(clippy::needless_doctest_main)] -use alloc::SwcAlloc; use std::ops::{Deref, DerefMut}; use bumpalo::Bump; -use crate::alloc::ALLOC; +pub use crate::alloc::SwcAllocator; mod alloc; pub mod boxed; pub mod vec; -#[derive(Debug, Clone, Copy)] +#[derive(Clone)] pub struct FastAlloc { - is_arena_mode: bool, -} - -impl FastAlloc { - fn swc_alloc(self) -> SwcAlloc { - SwcAlloc { - is_arena_mode: self.is_arena_mode, - } - } + alloc: Option<&'static SwcAllocator>, } #[derive(Default)] -pub struct MemorySpace { +struct MemorySpace { alloc: Bump, } -impl MemorySpace { - /// Invokes `f` in a scope where the allocations are done in this allocator. - #[inline(always)] - pub fn scope(&self, f: F) -> R - where - F: FnOnce() -> R, - { - ALLOC.set(self, f) - } -} - impl From for MemorySpace { fn from(alloc: Bump) -> Self { Self { alloc } diff --git a/crates/swc_allocator/src/vec/mod.rs b/crates/swc_allocator/src/vec/mod.rs index 3ca69557b399..d6297df85b08 100644 --- a/crates/swc_allocator/src/vec/mod.rs +++ b/crates/swc_allocator/src/vec/mod.rs @@ -3,7 +3,7 @@ use std::ops::{Deref, DerefMut}; #[cfg(feature = "rkyv")] mod rkyv; -use crate::{alloc::SwcAlloc, boxed::Box, FastAlloc}; +use crate::{boxed::Box, FastAlloc}; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(transparent)] @@ -11,7 +11,7 @@ use crate::{alloc::SwcAlloc, boxed::Box, FastAlloc}; feature = "serde", derive(serde_derive::Serialize, serde_derive::Deserialize) )] -pub struct Vec(allocator_api2::vec::Vec); +pub struct Vec(allocator_api2::vec::Vec); impl Vec { pub fn new() -> Self { @@ -21,7 +21,7 @@ impl Vec { pub fn with_capacity(capacity: usize) -> Self { Self(allocator_api2::vec::Vec::with_capacity_in( capacity, - SwcAlloc::default(), + FastAlloc::default(), )) } @@ -167,13 +167,13 @@ impl Vec { ptr, length, capacity, - SwcAlloc::default(), + FastAlloc::default(), )) } } impl Deref for Vec { - type Target = allocator_api2::vec::Vec; + type Target = allocator_api2::vec::Vec; fn deref(&self) -> &Self::Target { &self.0 @@ -188,12 +188,12 @@ impl DerefMut for Vec { impl Default for Vec { fn default() -> Self { - Self(allocator_api2::vec::Vec::new_in(SwcAlloc::default())) + Self(allocator_api2::vec::Vec::new_in(FastAlloc::default())) } } impl IntoIterator for Vec { - type IntoIter = allocator_api2::vec::IntoIter; + type IntoIter = allocator_api2::vec::IntoIter; type Item = T; fn into_iter(self) -> Self::IntoIter { @@ -245,14 +245,14 @@ impl Extend for Vec { } impl FastAlloc { - pub fn vec(self) -> Vec { - Vec(allocator_api2::vec::Vec::new_in(self.swc_alloc())) + pub fn vec(&self) -> Vec { + Vec(allocator_api2::vec::Vec::new_in(self.clone())) } - pub fn vec_with_capacity(self, capacity: usize) -> Vec { + pub fn vec_with_capacity(&self, capacity: usize) -> Vec { Vec(allocator_api2::vec::Vec::with_capacity_in( capacity, - self.swc_alloc(), + self.clone(), )) } } diff --git a/crates/swc_allocator/tests/apis.rs b/crates/swc_allocator/tests/apis.rs index 5a796d9ad4cd..a613046b0fbe 100644 --- a/crates/swc_allocator/tests/apis.rs +++ b/crates/swc_allocator/tests/apis.rs @@ -1,5 +1,5 @@ use criterion::black_box; -use swc_allocator::MemorySpace; +use swc_allocator::SwcAllocator; #[test] fn direct_alloc_std() { @@ -22,7 +22,7 @@ fn direct_alloc_no_scope() { #[test] fn direct_alloc_in_scope() { - let allocator = MemorySpace::default(); + let allocator = SwcAllocator::default(); allocator.scope(|| { let mut vec = swc_allocator::vec::Vec::new();