Skip to content

Commit

Permalink
Phase 1 of refactoring pgo data pipeline (#46638)
Browse files Browse the repository at this point in the history
Phase 1 of replacing existing infrastructure around handling of pgo data with more flexible schema based approach.

The schema based approach allows the JIT to define the form of data needed for instrumentation.
- The schema associates 4 32bit integers with each data collection point (ILOffset, InstrumentationKind, Count, and Other)
  - Rich meaning is attached to InstrumentationKind, and Count
    - InstrumentationKind defines the size and layout of individual instrumentation data items
    - Count allows a single schema item to be repeated
  - ILOffset and Other are not processed in any specific way by the infrastructure

Changes part of this phase
- PgoManager holds arbitrary amount of pgo data instead of a slab
  - Aware of collectible assemblies
  - Match with pgo data utilizes hash of IL body in addition to IL size information for greater accuracy in match
- JIT no longer uses block count apis, and instead uses schema based apis
  - JIT now explicitly defines the shape of data collected for both basic block and type probes
  - The rest of the system handles that without deep knowledge of what those formats are
- Text file format for pgo data updated
- Existing IBC infrastructure adjusted to speak in terms of schema concept
- Uncompressed and binary encoded implementation of Pgo schema handling
- Update SuperPMI to handle new apis

Future Changes for static Pgo
- Move Pgo type handle histogram processing into JIT
- Extract Pgo data from process using Event infrastructure
- Triggers for controlling Pgo data extraction
- Instrumented Pgo processing as part of dotnet-pgo tool
- Pgo data flow in crossgen2
  • Loading branch information
davidwrighton committed Jan 13, 2021
1 parent fd744a8 commit 6ded57b
Show file tree
Hide file tree
Showing 41 changed files with 2,757 additions and 956 deletions.
19 changes: 17 additions & 2 deletions src/coreclr/ToolBox/superpmi/superpmi-shared/agnostic.h
Original file line number Diff line number Diff line change
Expand Up @@ -463,18 +463,33 @@ struct Agnostic_IsCompatibleDelegate
DWORDLONG delegateCls;
};

struct Agnostic_AllocMethodBlockCounts
struct Agnostic_PgoInstrumentationSchema
{
DWORDLONG Offset;
ICorJitInfo::PgoInstrumentationKind InstrumentationKind;
int32_t ILOffset;
int32_t Count;
int32_t Other;
};

struct Agnostic_AllocPgoInstrumentationBySchema
{
DWORDLONG address;
DWORD count;
DWORD schema_index;
DWORD schemaCount;
DWORD result;
};

struct Agnostic_GetMethodBlockCounts
struct Agnostic_GetPgoInstrumentationResults
{
DWORD count;
DWORD pBlockCounts_index;
DWORD numRuns;
DWORD schemaCount;
DWORD dataByteCount;
DWORD schema_index;
DWORD data_index;
DWORD result;
};

Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/ToolBox/superpmi/superpmi-shared/lwmlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
#define DENSELWM(map, value) LWM(map, this_is_an_error, value)
#endif

LWM(AllocMethodBlockCounts, DWORD, Agnostic_AllocMethodBlockCounts)
LWM(AllocPgoInstrumentationBySchema, DWORDLONG, Agnostic_AllocPgoInstrumentationBySchema)
LWM(GetPgoInstrumentationResults, DWORDLONG, Agnostic_GetPgoInstrumentationResults)
LWM(AppendClassName, Agnostic_AppendClassName, DWORD)
LWM(AreTypesEquivalent, DLDL, DWORD)
LWM(AsCorInfoType, DWORDLONG, DWORD)
Expand Down Expand Up @@ -54,7 +55,6 @@ LWM(GetArgNext, DWORDLONG, DWORDLONG)
LWM(GetArgType, Agnostic_GetArgType_Key, Agnostic_GetArgType_Value)
LWM(GetArrayInitializationData, DLD, DWORDLONG)
LWM(GetArrayRank, DWORDLONG, DWORD)
LWM(GetMethodBlockCounts, DWORDLONG, Agnostic_GetMethodBlockCounts)
LWM(GetBoundaries, DWORDLONG, Agnostic_GetBoundaries)
LWM(GetBoxHelper, DWORDLONG, DWORD)
LWM(GetBuiltinClass, DWORD, DWORDLONG)
Expand Down
159 changes: 112 additions & 47 deletions src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ void MethodContext::Destroy()
#include "lwmlist.h"

delete cr;
FreeTempAllocations();
}

#define sparseAddLen(target) \
Expand Down Expand Up @@ -288,6 +289,8 @@ void MethodContext::MethodInitHelper(unsigned char* buff2, unsigned int totalLen
unsigned char canary = 0xff;
unsigned char* buff3 = nullptr;

FreeTempAllocations();

while (buffIndex < totalLen)
{
mcPackets packetType = (mcPackets)buff2[buffIndex++];
Expand Down Expand Up @@ -5069,36 +5072,65 @@ DWORD MethodContext::repGetFieldThreadLocalStoreID(CORINFO_FIELD_HANDLE field, v
}


void MethodContext::recAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts, HRESULT result)
void MethodContext::recAllocPgoInstrumentationBySchema(CORINFO_METHOD_HANDLE ftnHnd, ICorJitInfo::PgoInstrumentationSchema* pSchema, UINT32 countSchemaItems, BYTE** pInstrumentationData, HRESULT result)
{
if (AllocMethodBlockCounts == nullptr)
AllocMethodBlockCounts = new LightWeightMap<DWORD, Agnostic_AllocMethodBlockCounts>();
if (AllocPgoInstrumentationBySchema == nullptr)
AllocPgoInstrumentationBySchema = new LightWeightMap<DWORDLONG, Agnostic_AllocPgoInstrumentationBySchema>();

Agnostic_AllocMethodBlockCounts value;
Agnostic_AllocPgoInstrumentationBySchema value;

value.address = CastPointer(*pBlockCounts);
value.count = (DWORD)count;
value.schemaCount = countSchemaItems;
value.address = CastPointer(*pInstrumentationData);
Agnostic_PgoInstrumentationSchema* agnosticSchema = (Agnostic_PgoInstrumentationSchema*)malloc(sizeof(Agnostic_PgoInstrumentationSchema) * countSchemaItems);
for (UINT32 i = 0; i < countSchemaItems; i++)
{
agnosticSchema[i].Offset = pSchema[i].Offset;
agnosticSchema[i].InstrumentationKind = pSchema[i].InstrumentationKind;
agnosticSchema[i].ILOffset = pSchema[i].ILOffset;
agnosticSchema[i].Count = pSchema[i].Count;
agnosticSchema[i].Other = pSchema[i].Other;
}
value.schema_index = AllocPgoInstrumentationBySchema->AddBuffer((unsigned char*)agnosticSchema, sizeof(Agnostic_PgoInstrumentationSchema) * countSchemaItems);
free(agnosticSchema);
value.result = (DWORD)result;

AllocMethodBlockCounts->Add((DWORD)0, value);
AllocPgoInstrumentationBySchema->Add(CastHandle(ftnHnd), value);
}
void MethodContext::dmpAllocMethodBlockCounts(DWORD key, const Agnostic_AllocMethodBlockCounts& value)

void MethodContext::dmpAllocPgoInstrumentationBySchema(DWORDLONG key, const Agnostic_AllocPgoInstrumentationBySchema& value)
{
printf("AllocMethodBlockCounts key %u, value addr-%016llX cnt-%u res-%08X", key, value.address, value.count, value.result);
printf("AllocPgoInstrumentationBySchema key ftn-%016llX, value addr-%016llX cnt-%u res-%08X", key, value.address, value.schemaCount, value.result);
Agnostic_PgoInstrumentationSchema* pBuf =
(Agnostic_PgoInstrumentationSchema*)AllocPgoInstrumentationBySchema->GetBuffer(value.schema_index);

for (UINT32 i = 0; i < value.schemaCount; i++)
{
printf(" Offset %016llX ILOffset %u Kind %u Count %u Other %u\n", pBuf[i].Offset, pBuf[i].ILOffset, pBuf[i].InstrumentationKind, pBuf[i].Count, pBuf[i].Other);
}
}
HRESULT MethodContext::repAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts)

DWORD MethodContext::repAllocPgoInstrumentationBySchema(CORINFO_METHOD_HANDLE ftnHnd, ICorJitInfo::PgoInstrumentationSchema* pSchema, UINT32 countSchemaItems, BYTE** pInstrumentationData)
{
Agnostic_AllocMethodBlockCounts value;
value = AllocMethodBlockCounts->Get((DWORD)0);
Agnostic_AllocPgoInstrumentationBySchema value;
value = AllocPgoInstrumentationBySchema->Get(CastHandle(ftnHnd));

if (count != value.count)
if (countSchemaItems != value.schemaCount)
{
LogWarning("AllocMethodBlockCount mismatch: record %d, replay %d", value.count, count);
LogWarning("AllocPgoInstrumentationBySchema mismatch: record %d, replay %d", value.schemaCount, countSchemaItems);
}

HRESULT result = (HRESULT)value.result;

// Allocate a scratch buffer, linked to method context via AllocMethodBlockCounts, so it gets
Agnostic_PgoInstrumentationSchema* pAgnosticSchema = (Agnostic_PgoInstrumentationSchema*)AllocPgoInstrumentationBySchema->GetBuffer(value.schema_index);
size_t maxOffset = 0;
for (UINT32 iSchema = 0; iSchema < countSchemaItems && iSchema < value.schemaCount; iSchema++)
{
pSchema[iSchema].Offset = (size_t)pAgnosticSchema[iSchema].Offset;
if (pSchema[iSchema].Offset > maxOffset)
maxOffset = pSchema[iSchema].Offset;
}

// Allocate a scratch buffer, linked to method context via AllocPgoInstrumentationBySchema, so it gets
// cleaned up when the method context does.
//
// We won't bother recording this via AddBuffer because currently SPMI will never look at it.
Expand All @@ -5107,54 +5139,87 @@ HRESULT MethodContext::repAllocMethodBlockCounts(ULONG count, ICorJitInfo::Block
// Todo, perhaps: record the buffer as a compile result instead, and defer copying until
// jit completion so we can snapshot the offsets the jit writes.
//
*pBlockCounts = (ICorJitInfo::BlockCounts*)AllocMethodBlockCounts->CreateBuffer(count * sizeof(ICorJitInfo::BlockCounts));
cr->recAddressMap((void*)value.address, (void*)*pBlockCounts, count * (sizeof(ICorJitInfo::BlockCounts)));
// Add 16 bytes of represent writeable space
size_t bufSize = maxOffset + 16;
*pInstrumentationData = (BYTE*)AllocJitTempBuffer((unsigned)bufSize);
cr->recAddressMap((void*)value.address, (void*)*pInstrumentationData, (unsigned)bufSize);
return result;
}

void MethodContext::recGetMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd,
UINT32 * pCount,
ICorJitInfo::BlockCounts** pBlockCounts,
UINT32 * pNumRuns,
HRESULT result)
void MethodContext::recGetPgoInstrumentationResults(CORINFO_METHOD_HANDLE ftnHnd,
ICorJitInfo::PgoInstrumentationSchema** pSchema,
UINT32* pCountSchemaItems,
BYTE** pInstrumentationData,
HRESULT result)
{
if (GetMethodBlockCounts == nullptr)
GetMethodBlockCounts = new LightWeightMap<DWORDLONG, Agnostic_GetMethodBlockCounts>();
if (GetPgoInstrumentationResults == nullptr)
GetPgoInstrumentationResults = new LightWeightMap<DWORDLONG, Agnostic_GetPgoInstrumentationResults>();

Agnostic_GetPgoInstrumentationResults value;

Agnostic_GetMethodBlockCounts value;
value.schemaCount = *pCountSchemaItems;

value.count = (DWORD)*pCount;
value.pBlockCounts_index =
GetMethodBlockCounts->AddBuffer((unsigned char*)*pBlockCounts, sizeof(ICorJitInfo::BlockCounts) * (*pCount));
value.numRuns = (DWORD)*pNumRuns;
Agnostic_PgoInstrumentationSchema* agnosticSchema = (Agnostic_PgoInstrumentationSchema*)malloc(sizeof(Agnostic_PgoInstrumentationSchema) * (*pCountSchemaItems));
size_t maxOffset = 0;
for (UINT32 i = 0; i < (*pCountSchemaItems); i++)
{
if ((*pSchema)[i].Offset > maxOffset)
maxOffset = (*pSchema)[i].Offset;
agnosticSchema[i].Offset = (*pSchema)[i].Offset;
agnosticSchema[i].InstrumentationKind = (*pSchema)[i].InstrumentationKind;
agnosticSchema[i].ILOffset = (*pSchema)[i].ILOffset;
agnosticSchema[i].Count = (*pSchema)[i].Count;
agnosticSchema[i].Other = (*pSchema)[i].Other;
}
value.schema_index = GetPgoInstrumentationResults->AddBuffer((unsigned char*)agnosticSchema, sizeof(Agnostic_PgoInstrumentationSchema) * (*pCountSchemaItems));
free(agnosticSchema);

// This isn't strictly accurate, but I think it'll do
size_t bufSize = maxOffset + 16;

value.data_index = GetPgoInstrumentationResults->AddBuffer((unsigned char*)*pInstrumentationData, (unsigned)bufSize);
value.dataByteCount = (unsigned)bufSize;
value.result = (DWORD)result;

GetMethodBlockCounts->Add(CastHandle(ftnHnd), value);
GetPgoInstrumentationResults->Add(CastHandle(ftnHnd), value);
}
void MethodContext::dmpGetMethodBlockCounts(DWORDLONG key, const Agnostic_GetMethodBlockCounts& value)
void MethodContext::dmpGetPgoInstrumentationResults(DWORDLONG key, const Agnostic_GetPgoInstrumentationResults& value)
{
printf("GetMethodBlockCounts key ftn-%016llX, value cnt-%u profileBuf-", key, value.count);
ICorJitInfo::BlockCounts* pBuf =
(ICorJitInfo::BlockCounts*)GetMethodBlockCounts->GetBuffer(value.pBlockCounts_index);
for (DWORD i = 0; i < value.count; i++, pBuf++)
printf("GetMethodBlockCounts key ftn-%016llX, value schemaCnt-%u profileBufSize-%u", key, value.schemaCount, value.dataByteCount);
Agnostic_PgoInstrumentationSchema* pBuf =
(Agnostic_PgoInstrumentationSchema*)GetPgoInstrumentationResults->GetBuffer(value.schema_index);

for (UINT32 i = 0; i < value.schemaCount; i++)
{
printf("{il-%u,cnt-%u}", pBuf->ILOffset, pBuf->ExecutionCount);
printf(" Offset %016llX ILOffset %u Kind %u Count %u Other %u\n", pBuf[i].Offset, pBuf[i].ILOffset, pBuf[i].InstrumentationKind, pBuf[i].Count, pBuf[i].Other);
}
GetMethodBlockCounts->Unlock();
printf(" numRuns-%u result-%u", value.numRuns, value.result);

// TODO, dump actual count data
}
HRESULT MethodContext::repGetMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd,
UINT32 * pCount,
ICorJitInfo::BlockCounts** pBlockCounts,
UINT32 * pNumRuns)
DWORD MethodContext::repGetPgoInstrumentationResults(CORINFO_METHOD_HANDLE ftnHnd,
ICorJitInfo::PgoInstrumentationSchema** pSchema,
UINT32* pCountSchemaItems,
BYTE** pInstrumentationData)
{
Agnostic_GetMethodBlockCounts tempValue;
Agnostic_GetPgoInstrumentationResults tempValue;

tempValue = GetMethodBlockCounts->Get(CastHandle(ftnHnd));
tempValue = GetPgoInstrumentationResults->Get(CastHandle(ftnHnd));

*pCountSchemaItems = (UINT32)tempValue.schemaCount;
*pInstrumentationData = (BYTE*)GetPgoInstrumentationResults->GetBuffer(tempValue.data_index);

*pSchema = (ICorJitInfo::PgoInstrumentationSchema*)AllocJitTempBuffer(tempValue.schemaCount * sizeof(ICorJitInfo::PgoInstrumentationSchema));

Agnostic_PgoInstrumentationSchema* pAgnosticSchema = (Agnostic_PgoInstrumentationSchema*)GetPgoInstrumentationResults->GetBuffer(tempValue.schema_index);
for (UINT32 iSchema = 0; iSchema < tempValue.schemaCount; iSchema++)
{
(*pSchema)[iSchema].Offset = (size_t)pAgnosticSchema[iSchema].Offset;
(*pSchema)[iSchema].ILOffset = pAgnosticSchema[iSchema].ILOffset;
(*pSchema)[iSchema].InstrumentationKind = pAgnosticSchema[iSchema].InstrumentationKind;
(*pSchema)[iSchema].Count = pAgnosticSchema[iSchema].Count;
(*pSchema)[iSchema].Other = pAgnosticSchema[iSchema].Other;
}

*pCount = (UINT32)tempValue.count;
*pBlockCounts = (ICorJitInfo::BlockCounts*)GetMethodBlockCounts->GetBuffer(tempValue.pBlockCounts_index);
*pNumRuns = (UINT32)tempValue.numRuns;
HRESULT result = (HRESULT)tempValue.result;
return result;
}
Expand Down
52 changes: 36 additions & 16 deletions src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.h
Original file line number Diff line number Diff line change
Expand Up @@ -635,20 +635,13 @@ class MethodContext
void dmpGetFieldThreadLocalStoreID(DWORDLONG key, DLD value);
DWORD repGetFieldThreadLocalStoreID(CORINFO_FIELD_HANDLE field, void** ppIndirection);

void recAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts, HRESULT result);
void dmpAllocMethodBlockCounts(DWORD key, const Agnostic_AllocMethodBlockCounts& value);
HRESULT repAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts);

void recGetMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd,
UINT32 * pCount,
ICorJitInfo::BlockCounts** pBlockCounts,
UINT32 * pNumRuns,
HRESULT result);
void dmpGetMethodBlockCounts(DWORDLONG key, const Agnostic_GetMethodBlockCounts& value);
HRESULT repGetMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd,
UINT32 * pCount,
ICorJitInfo::BlockCounts** pBlockCounts,
UINT32 * pNumRuns);
void recAllocPgoInstrumentationBySchema(CORINFO_METHOD_HANDLE ftnHnd, ICorJitInfo::PgoInstrumentationSchema* pSchema, UINT32 countSchemaItems, BYTE** pInstrumentationData, HRESULT result);
void dmpAllocPgoInstrumentationBySchema(DWORDLONG key, const Agnostic_AllocPgoInstrumentationBySchema& value);
DWORD repAllocPgoInstrumentationBySchema(CORINFO_METHOD_HANDLE ftnHnd, ICorJitInfo::PgoInstrumentationSchema* pSchema, UINT32 countSchemaItems, BYTE** pInstrumentationData);

void recGetPgoInstrumentationResults(CORINFO_METHOD_HANDLE ftnHnd, ICorJitInfo::PgoInstrumentationSchema** pSchema, UINT32* pCountSchemaItems, BYTE** pInstrumentationData, HRESULT result);
void dmpGetPgoInstrumentationResults(DWORDLONG key, const Agnostic_GetPgoInstrumentationResults& value);
DWORD repGetPgoInstrumentationResults(CORINFO_METHOD_HANDLE ftnHnd, ICorJitInfo::PgoInstrumentationSchema** pSchema, UINT32* pCountSchemaItems, BYTE** pInstrumentationData);

void recGetLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, CORINFO_CLASS_HANDLE baseHnd, UINT32 ilOffset, CORINFO_CLASS_HANDLE classHnd, UINT32* pLikelihood, UINT32* pNumberOfClasses);
void dmpGetLikelyClass(const Agnostic_GetLikelyClass& key, const Agnostic_GetLikelyClassResult& value);
Expand Down Expand Up @@ -823,14 +816,39 @@ class MethodContext

// MD5 hasher
static Hash m_hash;

// Scheme for jit time temporary allocations
struct DeletionNode
{
DeletionNode* pNext;
};
DeletionNode *nodesToDelete = nullptr;

void* AllocJitTempBuffer(size_t size)
{
DeletionNode *pDeletionNode = (DeletionNode *)malloc(sizeof(DeletionNode) + size);
pDeletionNode = this->nodesToDelete;
this->nodesToDelete = pDeletionNode;
return pDeletionNode + 1;
}

void FreeTempAllocations()
{
while (nodesToDelete != nullptr)
{
DeletionNode *next = nodesToDelete->pNext;
free(nodesToDelete);
nodesToDelete = next;
}
}
};

// ********************* Please keep this up-to-date to ease adding more ***************
// Highest packet number: 185
// Highest packet number: 187
// *************************************************************************************
enum mcPackets
{
Packet_AllocMethodBlockCounts = 131,
Packet_AllocMethodBlockCounts = 131, // retired 1/4/2021
Packet_AppendClassName = 149, // Added 8/6/2014 - needed for SIMD
Packet_AreTypesEquivalent = 1,
Packet_AsCorInfoType = 2,
Expand Down Expand Up @@ -988,6 +1006,8 @@ enum mcPackets
Packet_SatisfiesMethodConstraints = 111,
Packet_ShouldEnforceCallvirtRestriction = 112, // Retired 2/18/2020
Packet_SigInstHandleMap = 184,
Packet_AllocPgoInstrumentationBySchema = 186, // Added 1/4/2021
Packet_GetPgoInstrumentationResults = 187, // Added 1/4/2021

PacketCR_AddressMap = 113,
PacketCR_AllocGCInfo = 114,
Expand Down
Loading

0 comments on commit 6ded57b

Please sign in to comment.