Skip to content

Commit

Permalink
Constant Time Lookup NibbleMap (#108939)
Browse files Browse the repository at this point in the history
- Changes NibbleMap implementation to have O(1) lookup and O(n) write/delete
- Increments ExecutionManager contract to version 2 which supports the new implementation
  • Loading branch information
max-charlamb authored Nov 11, 2024
1 parent 470afc6 commit 4087cc8
Show file tree
Hide file tree
Showing 8 changed files with 281 additions and 85 deletions.
68 changes: 38 additions & 30 deletions src/coreclr/debug/daccess/fntableaccess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,23 +44,32 @@

static NTSTATUS OutOfProcessFindHeader(ReadMemoryFunction fpReadMemory,PVOID pUserContext, DWORD_PTR pMapIn, DWORD_PTR addr, DWORD_PTR &codeHead)
{
using namespace NibbleMap;
codeHead = 0;

DWORD dword;
DWORD tmp; // must be a DWORD, not a DWORD_PTR
DWORD_PTR startPos = ADDR2POS(addr); // align to 128 byte buckets ( == index into the array of nibbles)
DWORD_PTR startPos = ADDR2POS(addr); // align to 32 byte buckets ( == index into the array of nibbles)
DWORD_PTR offset = ADDR2OFFS(addr); // this is the offset inside the bucket + 1
DWORD * pMap = (DWORD *) pMapIn; // make this a pointer type so our pointer math is correct w/o adding sizeof(DWORD) everywhere

_ASSERTE(offset == (offset & NIBBLE_MASK)); // the offset must fit in a nibble

pMap += (startPos >> LOG2_NIBBLES_PER_DWORD); // points to the proper DWORD of the map

//
// get DWORD and shift down our nibble
//
move(tmp, pMap);
tmp = tmp >> POS2SHIFTCOUNT(startPos);
// #1 look up DWORD represnting current PC
move(dword, pMap);

// #2 if DWORD is a pointer, then we can return
if (IsPointer(dword))
{
codeHead = DecodePointer(dword) - sizeof(CodeHeader);
return STATUS_SUCCESS;
}

tmp = dword >> POS2SHIFTCOUNT(startPos);

// #3 check if corresponding nibble is intialized and points to an equal or earlier address
// don't allow equality in the next check (tmp & NIBBLE_MASK == offset)
// there are code blocks that terminate with a call instruction
// (like call throwobject), i.e. their return address is
Expand All @@ -76,9 +85,8 @@ static NTSTATUS OutOfProcessFindHeader(ReadMemoryFunction fpReadMemory,PVOID pUs
return STATUS_SUCCESS;
}

// is there a header in the remainder of the DWORD ?
// #4 try to find preceeding nibble in the DWORD
tmp = tmp >> NIBBLE_SIZE;

if (tmp)
{
startPos--;
Expand All @@ -92,40 +100,40 @@ static NTSTATUS OutOfProcessFindHeader(ReadMemoryFunction fpReadMemory,PVOID pUs
return STATUS_SUCCESS;
}

// we skipped the remainder of the DWORD,
// #5.1 read previous DWORD
// We skipped the remainder of the DWORD,
// so we must set startPos to the highest position of
// previous DWORD

startPos = ((startPos >> LOG2_NIBBLES_PER_DWORD) << LOG2_NIBBLES_PER_DWORD) - 1;

if ((INT_PTR)startPos < 0)
// previous DWORD, unless we are already on the first DWORD
if (startPos < NIBBLES_PER_DWORD)
{
return STATUS_SUCCESS;
return 0;
}

// skip "headerless" DWORDS

startPos = ((startPos >> LOG2_NIBBLES_PER_DWORD) << LOG2_NIBBLES_PER_DWORD) - 1;
pMap--;
move(tmp, pMap);
while (!tmp)
move(dword, pMap);

// If the second dword is not empty, it either has a nibble or a pointer
if (dword)
{
startPos -= NIBBLES_PER_DWORD;
if ((INT_PTR)startPos < 0)
// #5.2 either DWORD is a pointer
if (IsPointer(dword))
{
codeHead = DecodePointer(dword) - sizeof(CodeHeader);
return STATUS_SUCCESS;
}
pMap--;
move (tmp, pMap);
}


while (!(tmp & NIBBLE_MASK))
{
tmp = tmp >> NIBBLE_SIZE;
startPos--;
// #5.4 or contains a nibble
tmp = dword;
while(!(tmp & NIBBLE_MASK))
{
tmp >>= NIBBLE_SIZE;
startPos--;
}
codeHead = POSOFF2ADDR(startPos, tmp & NIBBLE_MASK) - sizeof(CodeHeader);
return STATUS_SUCCESS;
}

codeHead = POSOFF2ADDR(startPos, tmp & NIBBLE_MASK) - sizeof(CodeHeader);
return STATUS_SUCCESS;
}

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/debug/runtimeinfo/contracts.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"DacStreams": 1,
"EcmaMetadata" : 1,
"Exception": 1,
"ExecutionManager": 1,
"ExecutionManager": 2,
"Loader": 1,
"Object": 1,
"PlatformMetadata": 1,
Expand Down
88 changes: 84 additions & 4 deletions src/coreclr/inc/nibblemapmacros.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,71 @@
// Because we cannot guarantee that jitblocks always start at
// multiples of 32 bytes we cannot use a simple bitmap; instead we
// use a nibble (4 bit) per bucket and encode the offset of the header
// inside the bucket (in DWORDS). In order to make initialization
// inside the bucket (in DWORDS). Each 32-bit DWORD represents 256 bytes
// in the mapped code region. In order to make initialization
// easier we add one to the real offset, a nibble-value of zero
// means that there is no header start in the resp. bucket.
// In order to speed up "backwards scanning" we start numbering
// nibbles inside a DWORD from the highest bits (28..31). Because
// of that we can scan backwards inside the DWORD with right shifts.
//
// To have constant time reads, we store pointers relative to the map
// base in DWORDs which represent code regions that are completely
// covered by a function. A DWORD is a pointer if the nibble value
// in the lowest bits of the DWORD have a value > 8.
//
// Pointers are encoded in DWORDS by using the top 28 bits as normal.
// The bottom 4 bits are read as a nibble (the value must be greater than 8
// to identify the DWORD as a pointer) which encodes the final 2 bits of
// information.
//
///////////////////////////////////////////////////////////////////////
//// Set Algorithm
///////////////////////////////////////////////////////////////////////
//
// 1. Write encoded nibble at offset corresponding to PC.
// 2. If codeSize completely covers one or more subsequent DWORDs,
// insert relative pointers into each covered DWORD.
//
///////////////////////////////////////////////////////////////////////
//// Delete Algorithm
///////////////////////////////////////////////////////////////////////
//
// 1. Delete the nibble corresponding to the PC.
// 2. Delete all following pointers which match the offset of PC.
// We must check the pointers refer to the PC because there may
// one or more subsequent nibbles in the DWORD. In that case the
// following pointers would not refer to PC but a different offset.
//
///////////////////////////////////////////////////////////////////////
//// Read Algorithm
///////////////////////////////////////////////////////////////////////
//
// 1. Look up DWORD representing given PC.
// 2. If DWORD is a pointer, then return pointer + mapBase.
// 3. If nibble corresponding to PC is initialized and the value
// it represents precedes the PC return that value.
// 4. Find the first preceding initialized nibble in the DWORD.
// If found, return the value the nibble represents.
// 5. Execute steps 2 and 4 on the proceeding DWORD.
// If this DWORD does not contain a pointer or any initialized nibbles,
// then we must not be in a function and can return an nullptr.
//
///////////////////////////////////////////////////////////////////////
//// Concurrency
///////////////////////////////////////////////////////////////////////
//
// Writes to the nibblemap (set and delete) require holding a critical
// section and therefore can not be done concurrently. Reads can be done
// without a lock and can occur at any time.
//
// The read algorithm is designed so that as long as no tearing occurs
// on a DWORD, the read will always be valid. This is because the read
// only depends on a single DWORD. Either the first if that contains a
// pointer/preceeding initialized nibble, or second if that contains a
// pointer/nibble. Given that DWORDs are 32-bits and aligned to 4-byte
// boundaries, these reads should not tear.
//

#if defined(HOST_64BIT)
// TODO: bump up the windows CODE_ALIGN to 16 and iron out any nibble map bugs that exist.
Expand All @@ -32,23 +91,44 @@
# define CODE_ALIGN sizeof(DWORD) // 4 byte boundry
# define LOG2_CODE_ALIGN 2
#endif
#define NIBBLE_MASK 0xf
#define NIBBLE_MASK 0xfu
#define NIBBLE_SIZE 4 // 4 bits
#define LOG2_NIBBLE_SIZE 2
#define NIBBLES_PER_DWORD ((8*sizeof(DWORD)) >> LOG2_NIBBLE_SIZE) // 8 (4-bit) nibbles per dword
#define NIBBLES_PER_DWORD (2 * sizeof(DWORD)) // 8 (4-bit) nibbles per dword
#define NIBBLES_PER_DWORD_MASK (NIBBLES_PER_DWORD - 1) // 7
#define LOG2_NIBBLES_PER_DWORD 3
#define BYTES_PER_BUCKET (NIBBLES_PER_DWORD * CODE_ALIGN) // 32 bytes per bucket
#define LOG2_BYTES_PER_BUCKET (LOG2_CODE_ALIGN + LOG2_NIBBLES_PER_DWORD) // 5 bits per bucket
#define MASK_BYTES_PER_BUCKET (BYTES_PER_BUCKET - 1) // 31
#define BYTES_PER_DWORD (NIBBLES_PER_DWORD * BYTES_PER_BUCKET) // 256 bytes per dword
#define LOG2_BYTES_PER_DWORD (LOG2_NIBBLES_PER_DWORD + LOG2_BYTES_PER_BUCKET) // 8 bits per dword
#define HIGHEST_NIBBLE_BIT (32 - NIBBLE_SIZE) // 28 (i.e 32 - 4)
#define HIGHEST_NIBBLE_MASK (NIBBLE_MASK << HIGHEST_NIBBLE_BIT) // 0xf0000000

#define ADDR2POS(x) ((x) >> LOG2_BYTES_PER_BUCKET)
#define ADDR2OFFS(x) (DWORD) ((((x) & MASK_BYTES_PER_BUCKET) >> LOG2_CODE_ALIGN) + 1)
#define POSOFF2ADDR(pos, of) (size_t) (((pos) << LOG2_BYTES_PER_BUCKET) + (((of) - 1) << LOG2_CODE_ALIGN))
#define HEAP2MAPSIZE(x) (((x) / (BYTES_PER_BUCKET * NIBBLES_PER_DWORD)) * CODE_ALIGN)
#define HEAP2MAPSIZE(x) (((x) / (BYTES_PER_DWORD) + 1) * sizeof(DWORD))
#define POS2SHIFTCOUNT(x) (DWORD) (HIGHEST_NIBBLE_BIT - (((x) & NIBBLES_PER_DWORD_MASK) << LOG2_NIBBLE_SIZE))
#define POS2MASK(x) (DWORD) ~(HIGHEST_NIBBLE_MASK >> (((x) & NIBBLES_PER_DWORD_MASK) << LOG2_NIBBLE_SIZE))

namespace NibbleMap
{
FORCEINLINE bool IsPointer(DWORD dword)
{
return (dword & NIBBLE_MASK) > 8;
}

FORCEINLINE DWORD EncodePointer(size_t relativePointer)
{
return (DWORD) ((relativePointer & ~NIBBLE_MASK) + (((relativePointer & NIBBLE_MASK) >> 2) + 9));
}

FORCEINLINE size_t DecodePointer(DWORD dword)
{
return (size_t) ((dword & ~NIBBLE_MASK) + (((dword & NIBBLE_MASK) - 9) << 2));
}
}


#endif // NIBBLEMAPMACROS_H_
16 changes: 15 additions & 1 deletion src/coreclr/utilcode/pedecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2581,6 +2581,8 @@ BOOL PEDecoder::ForceRelocForDLL(LPCWSTR lpFileName)
MethodSectionIterator::MethodSectionIterator(const void *code, SIZE_T codeSize,
const void *codeTable, SIZE_T codeTableSize)
{
using namespace NibbleMap;

//For DAC builds,we'll read the table one DWORD at a time. Note that m_code IS
//NOT a host pointer.
m_codeTableStart = PTR_DWORD(TADDR(codeTable));
Expand All @@ -2595,6 +2597,11 @@ MethodSectionIterator::MethodSectionIterator(const void *code, SIZE_T codeSize,
{
m_dword = *m_codeTable++;
m_index = 0;
while(m_codeTable < m_codeTableEnd && IsPointer(m_dword))
{
m_dword = *m_codeTable++;
m_code += BYTES_PER_DWORD;
}
}
else
{
Expand All @@ -2604,6 +2611,8 @@ MethodSectionIterator::MethodSectionIterator(const void *code, SIZE_T codeSize,

BOOL MethodSectionIterator::Next()
{
using namespace NibbleMap;

while (m_codeTable < m_codeTableEnd || m_index < (int)NIBBLES_PER_DWORD)
{
while (m_index++ < (int)NIBBLES_PER_DWORD)
Expand All @@ -2614,7 +2623,7 @@ BOOL MethodSectionIterator::Next()
if (nibble != 0)
{
// We have found a method start
m_current = m_code + ((nibble-1)*CODE_ALIGN);
m_current = m_code + ((nibble-1) << LOG2_CODE_ALIGN);
m_code += BYTES_PER_BUCKET;
return TRUE;
}
Expand All @@ -2626,6 +2635,11 @@ BOOL MethodSectionIterator::Next()
{
m_dword = *m_codeTable++;
m_index = 0;
while(m_codeTable < m_codeTableEnd && (IsPointer(m_dword) || m_dword == 0))
{
m_dword = *m_codeTable++;
m_code += BYTES_PER_DWORD;
}
}
}
return FALSE;
Expand Down
Loading

0 comments on commit 4087cc8

Please sign in to comment.