Skip to content

Commit

Permalink
[scudo] Add partial chunk heuristic to retrieval algorithm. (llvm#105009
Browse files Browse the repository at this point in the history
)

Previously the secondary cache retrieval algorithm would not allow
retrievals of memory chunks where the number of unused bytes would be
greater than than `MaxUnreleasedCachePages * PageSize` bytes. This meant
that even if a memory chunk satisfied the requirements of the optimal
fit algorithm, it may not be returned. This remains true if memory
tagging is enabled. However, if memory tagging is disabled, a new
heuristic has been put in place. Specifically, If a memory chunk is a
non-optimal fit, the cache retrieval algorithm will attempt to release
the excess memory to force a cache hit while keeping RSS down.

In the event that a memory chunk is a non-optimal fit, the retrieval
algorithm will release excess memory as long as the amount of memory to
be released is less than or equal to 4 Pages. If the amount of memory to
be released exceeds 4 Pages, the retrieval algorithm will not consider
that cached memory chunk valid for retrieval.

This change also addresses an alignment issue in a test case submitted
in llvm#104807.
  • Loading branch information
JoshuaMBa authored and 5c4lar committed Aug 29, 2024
1 parent e2579a8 commit 5f443f0
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 34 deletions.
130 changes: 100 additions & 30 deletions compiler-rt/lib/scudo/standalone/secondary.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,15 @@ namespace {
struct CachedBlock {
static constexpr u16 CacheIndexMax = UINT16_MAX;
static constexpr u16 InvalidEntry = CacheIndexMax;
// * MaxReleasedCachePages default is currently 4
// - We arrived at this value after noticing that mapping
// in larger memory regions performs better than releasing
// memory and forcing a cache hit. According to the data,
// it suggests that beyond 4 pages, the release execution time is
// longer than the map execution time. In this way, the default
// is dependent on the platform.
// TODO: set MaxReleasedCachePages back to 4U
static constexpr uptr MaxReleasedCachePages = 0U;

uptr CommitBase = 0;
uptr CommitSize = 0;
Expand All @@ -90,8 +99,9 @@ struct CachedBlock {
template <typename Config> class MapAllocatorNoCache {
public:
void init(UNUSED s32 ReleaseToOsInterval) {}
CachedBlock retrieve(UNUSED uptr Size, UNUSED uptr Alignment,
UNUSED uptr HeadersSize, UNUSED uptr &EntryHeaderPos) {
CachedBlock retrieve(UNUSED uptr MaxAllowedFragmentedBytes, UNUSED uptr Size,
UNUSED uptr Alignment, UNUSED uptr HeadersSize,
UNUSED uptr &EntryHeaderPos) {
return {};
}
void store(UNUSED Options Options, UNUSED uptr CommitBase,
Expand Down Expand Up @@ -121,7 +131,7 @@ template <typename Config> class MapAllocatorNoCache {
}
};

static const uptr MaxUnusedCachePages = 4U;
static const uptr MaxUnreleasedCachePages = 4U;

template <typename Config>
bool mapSecondary(const Options &Options, uptr CommitBase, uptr CommitSize,
Expand Down Expand Up @@ -151,9 +161,11 @@ bool mapSecondary(const Options &Options, uptr CommitBase, uptr CommitSize,
}
}

const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * PageSize;
if (useMemoryTagging<Config>(Options) && CommitSize > MaxUnusedCacheBytes) {
const uptr UntaggedPos = Max(AllocPos, CommitBase + MaxUnusedCacheBytes);
const uptr MaxUnreleasedCacheBytes = MaxUnreleasedCachePages * PageSize;
if (useMemoryTagging<Config>(Options) &&
CommitSize > MaxUnreleasedCacheBytes) {
const uptr UntaggedPos =
Max(AllocPos, CommitBase + MaxUnreleasedCacheBytes);
return MemMap.remap(CommitBase, UntaggedPos - CommitBase, "scudo:secondary",
MAP_MEMTAG | Flags) &&
MemMap.remap(UntaggedPos, CommitBase + CommitSize - UntaggedPos,
Expand Down Expand Up @@ -334,61 +346,114 @@ class MapAllocatorCache {
}
}

CachedBlock retrieve(uptr Size, uptr Alignment, uptr HeadersSize,
uptr &EntryHeaderPos) EXCLUDES(Mutex) {
CachedBlock retrieve(uptr MaxAllowedFragmentedPages, uptr Size,
uptr Alignment, uptr HeadersSize, uptr &EntryHeaderPos)
EXCLUDES(Mutex) {
const uptr PageSize = getPageSizeCached();
// 10% of the requested size proved to be the optimal choice for
// retrieving cached blocks after testing several options.
constexpr u32 FragmentedBytesDivisor = 10;
bool Found = false;
CachedBlock Entry;
EntryHeaderPos = 0;
{
ScopedLock L(Mutex);
CallsToRetrieve++;
if (EntriesCount == 0)
return {};
u32 OptimalFitIndex = 0;
u16 RetrievedIndex = CachedBlock::InvalidEntry;
uptr MinDiff = UINTPTR_MAX;
for (u32 I = LRUHead; I != CachedBlock::InvalidEntry;

// Since allocation sizes don't always match cached memory chunk sizes
// we allow some memory to be unused (called fragmented bytes). The
// amount of unused bytes is exactly EntryHeaderPos - CommitBase.
//
// CommitBase CommitBase + CommitSize
// V V
// +---+------------+-----------------+---+
// | | | | |
// +---+------------+-----------------+---+
// ^ ^ ^
// Guard EntryHeaderPos Guard-page-end
// page-begin
//
// [EntryHeaderPos, CommitBase + CommitSize) contains the user data as
// well as the header metadata. If EntryHeaderPos - CommitBase exceeds
// MaxAllowedFragmentedPages * PageSize, the cached memory chunk is
// not considered valid for retrieval.
for (u16 I = LRUHead; I != CachedBlock::InvalidEntry;
I = Entries[I].Next) {
const uptr CommitBase = Entries[I].CommitBase;
const uptr CommitSize = Entries[I].CommitSize;
const uptr AllocPos =
roundDown(CommitBase + CommitSize - Size, Alignment);
const uptr HeaderPos = AllocPos - HeadersSize;
const uptr MaxAllowedFragmentedBytes =
MaxAllowedFragmentedPages * PageSize;
if (HeaderPos > CommitBase + CommitSize)
continue;
// TODO: Remove AllocPos > CommitBase + MaxAllowedFragmentedBytes
// and replace with Diff > MaxAllowedFragmentedBytes
if (HeaderPos < CommitBase ||
AllocPos > CommitBase + PageSize * MaxUnusedCachePages) {
AllocPos > CommitBase + MaxAllowedFragmentedBytes) {
continue;
}
Found = true;
const uptr Diff = HeaderPos - CommitBase;
// immediately use a cached block if it's size is close enough to the
// requested size.
const uptr MaxAllowedFragmentedBytes =
(CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
if (Diff <= MaxAllowedFragmentedBytes) {
OptimalFitIndex = I;
EntryHeaderPos = HeaderPos;
break;
}
// keep track of the smallest cached block

const uptr Diff = roundDown(HeaderPos, PageSize) - CommitBase;

// Keep track of the smallest cached block
// that is greater than (AllocSize + HeaderSize)
if (Diff > MinDiff)
if (Diff >= MinDiff)
continue;
OptimalFitIndex = I;

MinDiff = Diff;
RetrievedIndex = I;
EntryHeaderPos = HeaderPos;

// Immediately use a cached block if its size is close enough to the
// requested size
const uptr OptimalFitThesholdBytes =
(CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
if (Diff <= OptimalFitThesholdBytes)
break;
}
if (Found) {
Entry = Entries[OptimalFitIndex];
remove(OptimalFitIndex);
if (RetrievedIndex != CachedBlock::InvalidEntry) {
Entry = Entries[RetrievedIndex];
remove(RetrievedIndex);
SuccessfulRetrieves++;
}
}

// The difference between the retrieved memory chunk and the request
// size is at most MaxAllowedFragmentedPages
//
// / MaxAllowedFragmentedPages * PageSize \
// +--------------------------+-------------+
// | | |
// +--------------------------+-------------+
// \ Bytes to be released / ^
// |
// (may or may not be committed)
//
// The maximum number of bytes released to the OS is capped by
// MaxReleasedCachePages
//
// TODO : Consider making MaxReleasedCachePages configurable since
// the release to OS API can vary across systems.
if (Entry.Time != 0) {
const uptr FragmentedBytes =
roundDown(EntryHeaderPos, PageSize) - Entry.CommitBase;
const uptr MaxUnreleasedCacheBytes = MaxUnreleasedCachePages * PageSize;
if (FragmentedBytes > MaxUnreleasedCacheBytes) {
const uptr MaxReleasedCacheBytes =
CachedBlock::MaxReleasedCachePages * PageSize;
uptr BytesToRelease =
roundUp(Min<uptr>(MaxReleasedCacheBytes,
FragmentedBytes - MaxUnreleasedCacheBytes),
PageSize);
Entry.MemMap.releaseAndZeroPagesToOS(Entry.CommitBase, BytesToRelease);
}
}

return Entry;
}

Expand Down Expand Up @@ -659,8 +724,13 @@ MapAllocator<Config>::tryAllocateFromCache(const Options &Options, uptr Size,
FillContentsMode FillContents) {
CachedBlock Entry;
uptr EntryHeaderPos;
uptr MaxAllowedFragmentedPages = MaxUnreleasedCachePages;

if (UNLIKELY(useMemoryTagging<Config>(Options)))
MaxAllowedFragmentedPages += CachedBlock::MaxReleasedCachePages;

Entry = Cache.retrieve(Size, Alignment, getHeadersSize(), EntryHeaderPos);
Entry = Cache.retrieve(MaxAllowedFragmentedPages, Size, Alignment,
getHeadersSize(), EntryHeaderPos);
if (!Entry.isValid())
return nullptr;

Expand Down
32 changes: 28 additions & 4 deletions compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,8 @@ struct MapAllocatorCacheTest : public Test {
std::unique_ptr<CacheT> Cache = std::make_unique<CacheT>();

const scudo::uptr PageSize = scudo::getPageSizeCached();
// The current test allocation size is set to the minimum size
// needed for the scudo allocator to fall back to the secondary allocator
// The current test allocation size is set to the maximum
// cache entry size
static constexpr scudo::uptr TestAllocSize =
CacheConfig::getDefaultMaxEntrySize();

Expand Down Expand Up @@ -327,7 +327,7 @@ TEST_F(MapAllocatorCacheTest, CacheOrder) {
for (scudo::uptr I = CacheConfig::getEntriesArraySize(); I > 0; I--) {
scudo::uptr EntryHeaderPos;
scudo::CachedBlock Entry =
Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos);
Cache->retrieve(0, TestAllocSize, PageSize, 0, EntryHeaderPos);
EXPECT_EQ(Entry.MemMap.getBase(), MemMaps[I - 1].getBase());
}

Expand All @@ -336,6 +336,30 @@ TEST_F(MapAllocatorCacheTest, CacheOrder) {
MemMap.unmap();
}

TEST_F(MapAllocatorCacheTest, PartialChunkHeuristicRetrievalTest) {
const scudo::uptr FragmentedPages =
1 + scudo::CachedBlock::MaxReleasedCachePages;
scudo::uptr EntryHeaderPos;
scudo::CachedBlock Entry;
scudo::MemMapT MemMap = allocate(PageSize + FragmentedPages * PageSize);
Cache->store(Options, MemMap.getBase(), MemMap.getCapacity(),
MemMap.getBase(), MemMap);

// FragmentedPages > MaxAllowedFragmentedPages so PageSize
// cannot be retrieved from the cache
Entry = Cache->retrieve(/*MaxAllowedFragmentedPages=*/0, PageSize, PageSize,
0, EntryHeaderPos);
EXPECT_FALSE(Entry.isValid());

// FragmentedPages == MaxAllowedFragmentedPages so PageSize
// can be retrieved from the cache
Entry =
Cache->retrieve(FragmentedPages, PageSize, PageSize, 0, EntryHeaderPos);
EXPECT_TRUE(Entry.isValid());

MemMap.unmap();
}

TEST_F(MapAllocatorCacheTest, MemoryLeakTest) {
std::vector<scudo::MemMapT> MemMaps;
// Fill the cache above MaxEntriesCount to force an eviction
Expand All @@ -351,7 +375,7 @@ TEST_F(MapAllocatorCacheTest, MemoryLeakTest) {
for (scudo::uptr I = CacheConfig::getDefaultMaxEntriesCount(); I > 0; I--) {
scudo::uptr EntryHeaderPos;
RetrievedEntries.push_back(
Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos));
Cache->retrieve(0, TestAllocSize, PageSize, 0, EntryHeaderPos));
EXPECT_EQ(MemMaps[I].getBase(), RetrievedEntries.back().MemMap.getBase());
}

Expand Down

0 comments on commit 5f443f0

Please sign in to comment.