Skip to content

Commit

Permalink
protobuf: remove double ptr_ load in arena allocation
Browse files Browse the repository at this point in the history
Currently the generated code contains 2 loads of ptr_ b/c it's an atomic:
one in HasSize and one in AllocateFromExisting.
Combine these to be a single load.

name                           old cpu/op   new cpu/op   delta
BM_ArenaAlloc/100              250ns ± 2%   229ns ± 1%  -8.20%  (p=0.000 n=54+47)
BM_ArenaAllocWithCleanup/100   865ns ± 1%   812ns ± 2%  -6.15%  (p=0.000 n=57+56)

PiperOrigin-RevId: 563698727
  • Loading branch information
protobuf-github-bot authored and copybara-github committed Sep 8, 2023
1 parent 7dabac9 commit de87585
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 33 deletions.
7 changes: 5 additions & 2 deletions src/google/protobuf/arena.cc
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,10 @@ SizedPtr SerialArena::Free(Deallocator deallocator) {
PROTOBUF_NOINLINE
void* SerialArena::AllocateAlignedFallback(size_t n) {
AllocateNewBlock(n);
return AllocateFromExisting(n);
void* ret;
bool res = MaybeAllocateAligned(n, &ret);
ABSL_DCHECK(res);
return ret;
}

PROTOBUF_NOINLINE
Expand Down Expand Up @@ -250,7 +253,7 @@ void* SerialArena::AllocateAlignedWithCleanupFallback(
size_t n, size_t align, void (*destructor)(void*)) {
size_t required = AlignUpTo(n, align) + cleanup::Size(destructor);
AllocateNewBlock(required);
return AllocateFromExistingWithCleanupFallback(n, align, destructor);
return AllocateAlignedWithCleanup(n, align, destructor);
}

PROTOBUF_NOINLINE
Expand Down
60 changes: 29 additions & 31 deletions src/google/protobuf/serial_arena.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

#include <algorithm>
#include <atomic>
#include <cstdint>
#include <string>
#include <type_traits>
#include <typeinfo>
Expand Down Expand Up @@ -122,10 +123,6 @@ class PROTOBUF_EXPORT SerialArena {
}
uint64_t SpaceUsed() const;

bool HasSpace(size_t n) const {
return n <= static_cast<size_t>(limit_ - ptr());
}

// See comments on `cached_blocks_` member for details.
PROTOBUF_ALWAYS_INLINE void* TryAllocateFromCachedBlock(size_t size) {
if (PROTOBUF_PREDICT_FALSE(size < 16)) return nullptr;
Expand Down Expand Up @@ -161,10 +158,11 @@ class PROTOBUF_EXPORT SerialArena {
}
}

if (PROTOBUF_PREDICT_FALSE(!HasSpace(n))) {
return AllocateAlignedFallback(n);
void* ptr;
if (PROTOBUF_PREDICT_TRUE(MaybeAllocateAligned(n, &ptr))) {
return ptr;
}
return AllocateFromExisting(n);
return AllocateAlignedFallback(n);
}

private:
Expand All @@ -184,13 +182,6 @@ class PROTOBUF_EXPORT SerialArena {
: ArenaAlignAs(a).CeilDefaultAligned(p);
}

void* AllocateFromExisting(size_t n) {
PROTOBUF_UNPOISON_MEMORY_REGION(ptr(), n);
void* ret = ptr();
set_ptr(static_cast<char*>(ret) + n);
return ret;
}

// See comments on `cached_blocks_` member for details.
void ReturnArrayMemory(void* p, size_t size) {
// We only need to check for 32-bit platforms.
Expand Down Expand Up @@ -246,8 +237,17 @@ class PROTOBUF_EXPORT SerialArena {
bool MaybeAllocateAligned(size_t n, void** out) {
ABSL_DCHECK(internal::ArenaAlignDefault::IsAligned(n));
ABSL_DCHECK_GE(limit_, ptr());
if (PROTOBUF_PREDICT_FALSE(!HasSpace(n))) return false;
*out = AllocateFromExisting(n);
char* ret = ptr();
// ret + n may point out of the block bounds, or ret may be nullptr.
// Both computations have undefined behavior when done on pointers,
// so do them on uintptr_t instead.
uintptr_t next = reinterpret_cast<uintptr_t>(ret) + n;
if (PROTOBUF_PREDICT_FALSE(next > reinterpret_cast<uintptr_t>(limit_))) {
return false;
}
PROTOBUF_UNPOISON_MEMORY_REGION(ret, n);
*out = ret;
set_ptr(reinterpret_cast<char*>(next));
return true;
}

Expand All @@ -262,17 +262,26 @@ class PROTOBUF_EXPORT SerialArena {
PROTOBUF_ALWAYS_INLINE
void* AllocateAlignedWithCleanup(size_t n, size_t align,
void (*destructor)(void*)) {
size_t required = AlignUpTo(n, align) + cleanup::Size(destructor);
if (PROTOBUF_PREDICT_FALSE(!HasSpace(required))) {
n = ArenaAlignDefault::Ceil(n);
char* ret = ArenaAlignAs(align).CeilDefaultAligned(ptr());
// See the comment in MaybeAllocateAligned re uintptr_t.
uintptr_t next = reinterpret_cast<uintptr_t>(ret) + n;
if (PROTOBUF_PREDICT_FALSE(next + cleanup::Size(destructor) >
reinterpret_cast<uintptr_t>(limit_))) {
return AllocateAlignedWithCleanupFallback(n, align, destructor);
}
return AllocateFromExistingWithCleanupFallback(n, align, destructor);
PROTOBUF_UNPOISON_MEMORY_REGION(ret, n);
set_ptr(reinterpret_cast<char*>(next));
AddCleanupFromExisting(ret, destructor);
ABSL_DCHECK_GE(limit_, ptr());
return ret;
}

PROTOBUF_ALWAYS_INLINE
void AddCleanup(void* elem, void (*destructor)(void*)) {
size_t required = cleanup::Size(destructor);
if (PROTOBUF_PREDICT_FALSE(!HasSpace(required))) {
size_t has = static_cast<size_t>(limit_ - ptr());
if (PROTOBUF_PREDICT_FALSE(required > has)) {
return AddCleanupFallback(elem, destructor);
}
AddCleanupFromExisting(elem, destructor);
Expand All @@ -286,17 +295,6 @@ class PROTOBUF_EXPORT SerialArena {
bool MaybeAllocateString(void*& p);
ABSL_ATTRIBUTE_RETURNS_NONNULL void* AllocateFromStringBlockFallback();

void* AllocateFromExistingWithCleanupFallback(size_t n, size_t align,
void (*destructor)(void*)) {
n = AlignUpTo(n, align);
PROTOBUF_UNPOISON_MEMORY_REGION(ptr(), n);
void* ret = ArenaAlignAs(align).CeilDefaultAligned(ptr());
set_ptr(ptr() + n);
ABSL_DCHECK_GE(limit_, ptr());
AddCleanupFromExisting(ret, destructor);
return ret;
}

PROTOBUF_ALWAYS_INLINE
void AddCleanupFromExisting(void* elem, void (*destructor)(void*)) {
cleanup::Tag tag = cleanup::Type(destructor);
Expand Down

0 comments on commit de87585

Please sign in to comment.