diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shared/agnostic.h b/src/coreclr/ToolBox/superpmi/superpmi-shared/agnostic.h index 29fab69dadc86..b67c4fa7980ad 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shared/agnostic.h +++ b/src/coreclr/ToolBox/superpmi/superpmi-shared/agnostic.h @@ -463,18 +463,33 @@ struct Agnostic_IsCompatibleDelegate DWORDLONG delegateCls; }; -struct Agnostic_AllocMethodBlockCounts +struct Agnostic_PgoInstrumentationSchema +{ + DWORDLONG Offset; + ICorJitInfo::PgoInstrumentationKind InstrumentationKind; + int32_t ILOffset; + int32_t Count; + int32_t Other; +}; + +struct Agnostic_AllocPgoInstrumentationBySchema { DWORDLONG address; DWORD count; + DWORD schema_index; + DWORD schemaCount; DWORD result; }; -struct Agnostic_GetMethodBlockCounts +struct Agnostic_GetPgoInstrumentationResults { DWORD count; DWORD pBlockCounts_index; DWORD numRuns; + DWORD schemaCount; + DWORD dataByteCount; + DWORD schema_index; + DWORD data_index; DWORD result; }; diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shared/lwmlist.h b/src/coreclr/ToolBox/superpmi/superpmi-shared/lwmlist.h index 1597cf46373a8..18948eafc4ce4 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shared/lwmlist.h +++ b/src/coreclr/ToolBox/superpmi/superpmi-shared/lwmlist.h @@ -18,7 +18,8 @@ #define DENSELWM(map, value) LWM(map, this_is_an_error, value) #endif -LWM(AllocMethodBlockCounts, DWORD, Agnostic_AllocMethodBlockCounts) +LWM(AllocPgoInstrumentationBySchema, DWORDLONG, Agnostic_AllocPgoInstrumentationBySchema) +LWM(GetPgoInstrumentationResults, DWORDLONG, Agnostic_GetPgoInstrumentationResults) LWM(AppendClassName, Agnostic_AppendClassName, DWORD) LWM(AreTypesEquivalent, DLDL, DWORD) LWM(AsCorInfoType, DWORDLONG, DWORD) @@ -54,7 +55,6 @@ LWM(GetArgNext, DWORDLONG, DWORDLONG) LWM(GetArgType, Agnostic_GetArgType_Key, Agnostic_GetArgType_Value) LWM(GetArrayInitializationData, DLD, DWORDLONG) LWM(GetArrayRank, DWORDLONG, DWORD) -LWM(GetMethodBlockCounts, DWORDLONG, Agnostic_GetMethodBlockCounts) LWM(GetBoundaries, DWORDLONG, Agnostic_GetBoundaries) LWM(GetBoxHelper, DWORDLONG, DWORD) LWM(GetBuiltinClass, DWORD, DWORDLONG) diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp index 028a7624281b4..9ed4a7b507482 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp @@ -64,6 +64,7 @@ void MethodContext::Destroy() #include "lwmlist.h" delete cr; + FreeTempAllocations(); } #define sparseAddLen(target) \ @@ -288,6 +289,8 @@ void MethodContext::MethodInitHelper(unsigned char* buff2, unsigned int totalLen unsigned char canary = 0xff; unsigned char* buff3 = nullptr; + FreeTempAllocations(); + while (buffIndex < totalLen) { mcPackets packetType = (mcPackets)buff2[buffIndex++]; @@ -5069,36 +5072,65 @@ DWORD MethodContext::repGetFieldThreadLocalStoreID(CORINFO_FIELD_HANDLE field, v } -void MethodContext::recAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts, HRESULT result) +void MethodContext::recAllocPgoInstrumentationBySchema(CORINFO_METHOD_HANDLE ftnHnd, ICorJitInfo::PgoInstrumentationSchema* pSchema, UINT32 countSchemaItems, BYTE** pInstrumentationData, HRESULT result) { - if (AllocMethodBlockCounts == nullptr) - AllocMethodBlockCounts = new LightWeightMap(); + if (AllocPgoInstrumentationBySchema == nullptr) + AllocPgoInstrumentationBySchema = new LightWeightMap(); - Agnostic_AllocMethodBlockCounts value; + Agnostic_AllocPgoInstrumentationBySchema value; - value.address = CastPointer(*pBlockCounts); - value.count = (DWORD)count; + value.schemaCount = countSchemaItems; + value.address = CastPointer(*pInstrumentationData); + Agnostic_PgoInstrumentationSchema* agnosticSchema = (Agnostic_PgoInstrumentationSchema*)malloc(sizeof(Agnostic_PgoInstrumentationSchema) * countSchemaItems); + for (UINT32 i = 0; i < countSchemaItems; i++) + { + agnosticSchema[i].Offset = pSchema[i].Offset; + agnosticSchema[i].InstrumentationKind = pSchema[i].InstrumentationKind; + agnosticSchema[i].ILOffset = pSchema[i].ILOffset; + agnosticSchema[i].Count = pSchema[i].Count; + agnosticSchema[i].Other = pSchema[i].Other; + } + value.schema_index = AllocPgoInstrumentationBySchema->AddBuffer((unsigned char*)agnosticSchema, sizeof(Agnostic_PgoInstrumentationSchema) * countSchemaItems); + free(agnosticSchema); value.result = (DWORD)result; - AllocMethodBlockCounts->Add((DWORD)0, value); + AllocPgoInstrumentationBySchema->Add(CastHandle(ftnHnd), value); } -void MethodContext::dmpAllocMethodBlockCounts(DWORD key, const Agnostic_AllocMethodBlockCounts& value) + +void MethodContext::dmpAllocPgoInstrumentationBySchema(DWORDLONG key, const Agnostic_AllocPgoInstrumentationBySchema& value) { - printf("AllocMethodBlockCounts key %u, value addr-%016llX cnt-%u res-%08X", key, value.address, value.count, value.result); + printf("AllocPgoInstrumentationBySchema key ftn-%016llX, value addr-%016llX cnt-%u res-%08X", key, value.address, value.schemaCount, value.result); + Agnostic_PgoInstrumentationSchema* pBuf = + (Agnostic_PgoInstrumentationSchema*)AllocPgoInstrumentationBySchema->GetBuffer(value.schema_index); + + for (UINT32 i = 0; i < value.schemaCount; i++) + { + printf(" Offset %016llX ILOffset %u Kind %u Count %u Other %u\n", pBuf[i].Offset, pBuf[i].ILOffset, pBuf[i].InstrumentationKind, pBuf[i].Count, pBuf[i].Other); + } } -HRESULT MethodContext::repAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts) + +DWORD MethodContext::repAllocPgoInstrumentationBySchema(CORINFO_METHOD_HANDLE ftnHnd, ICorJitInfo::PgoInstrumentationSchema* pSchema, UINT32 countSchemaItems, BYTE** pInstrumentationData) { - Agnostic_AllocMethodBlockCounts value; - value = AllocMethodBlockCounts->Get((DWORD)0); + Agnostic_AllocPgoInstrumentationBySchema value; + value = AllocPgoInstrumentationBySchema->Get(CastHandle(ftnHnd)); - if (count != value.count) + if (countSchemaItems != value.schemaCount) { - LogWarning("AllocMethodBlockCount mismatch: record %d, replay %d", value.count, count); + LogWarning("AllocPgoInstrumentationBySchema mismatch: record %d, replay %d", value.schemaCount, countSchemaItems); } HRESULT result = (HRESULT)value.result; - // Allocate a scratch buffer, linked to method context via AllocMethodBlockCounts, so it gets + Agnostic_PgoInstrumentationSchema* pAgnosticSchema = (Agnostic_PgoInstrumentationSchema*)AllocPgoInstrumentationBySchema->GetBuffer(value.schema_index); + size_t maxOffset = 0; + for (UINT32 iSchema = 0; iSchema < countSchemaItems && iSchema < value.schemaCount; iSchema++) + { + pSchema[iSchema].Offset = (size_t)pAgnosticSchema[iSchema].Offset; + if (pSchema[iSchema].Offset > maxOffset) + maxOffset = pSchema[iSchema].Offset; + } + + // Allocate a scratch buffer, linked to method context via AllocPgoInstrumentationBySchema, so it gets // cleaned up when the method context does. // // We won't bother recording this via AddBuffer because currently SPMI will never look at it. @@ -5107,54 +5139,87 @@ HRESULT MethodContext::repAllocMethodBlockCounts(ULONG count, ICorJitInfo::Block // Todo, perhaps: record the buffer as a compile result instead, and defer copying until // jit completion so we can snapshot the offsets the jit writes. // - *pBlockCounts = (ICorJitInfo::BlockCounts*)AllocMethodBlockCounts->CreateBuffer(count * sizeof(ICorJitInfo::BlockCounts)); - cr->recAddressMap((void*)value.address, (void*)*pBlockCounts, count * (sizeof(ICorJitInfo::BlockCounts))); + // Add 16 bytes of represent writeable space + size_t bufSize = maxOffset + 16; + *pInstrumentationData = (BYTE*)AllocJitTempBuffer((unsigned)bufSize); + cr->recAddressMap((void*)value.address, (void*)*pInstrumentationData, (unsigned)bufSize); return result; } -void MethodContext::recGetMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd, - UINT32 * pCount, - ICorJitInfo::BlockCounts** pBlockCounts, - UINT32 * pNumRuns, - HRESULT result) +void MethodContext::recGetPgoInstrumentationResults(CORINFO_METHOD_HANDLE ftnHnd, + ICorJitInfo::PgoInstrumentationSchema** pSchema, + UINT32* pCountSchemaItems, + BYTE** pInstrumentationData, + HRESULT result) { - if (GetMethodBlockCounts == nullptr) - GetMethodBlockCounts = new LightWeightMap(); + if (GetPgoInstrumentationResults == nullptr) + GetPgoInstrumentationResults = new LightWeightMap(); + + Agnostic_GetPgoInstrumentationResults value; - Agnostic_GetMethodBlockCounts value; + value.schemaCount = *pCountSchemaItems; - value.count = (DWORD)*pCount; - value.pBlockCounts_index = - GetMethodBlockCounts->AddBuffer((unsigned char*)*pBlockCounts, sizeof(ICorJitInfo::BlockCounts) * (*pCount)); - value.numRuns = (DWORD)*pNumRuns; + Agnostic_PgoInstrumentationSchema* agnosticSchema = (Agnostic_PgoInstrumentationSchema*)malloc(sizeof(Agnostic_PgoInstrumentationSchema) * (*pCountSchemaItems)); + size_t maxOffset = 0; + for (UINT32 i = 0; i < (*pCountSchemaItems); i++) + { + if ((*pSchema)[i].Offset > maxOffset) + maxOffset = (*pSchema)[i].Offset; + agnosticSchema[i].Offset = (*pSchema)[i].Offset; + agnosticSchema[i].InstrumentationKind = (*pSchema)[i].InstrumentationKind; + agnosticSchema[i].ILOffset = (*pSchema)[i].ILOffset; + agnosticSchema[i].Count = (*pSchema)[i].Count; + agnosticSchema[i].Other = (*pSchema)[i].Other; + } + value.schema_index = GetPgoInstrumentationResults->AddBuffer((unsigned char*)agnosticSchema, sizeof(Agnostic_PgoInstrumentationSchema) * (*pCountSchemaItems)); + free(agnosticSchema); + + // This isn't strictly accurate, but I think it'll do + size_t bufSize = maxOffset + 16; + + value.data_index = GetPgoInstrumentationResults->AddBuffer((unsigned char*)*pInstrumentationData, (unsigned)bufSize); + value.dataByteCount = (unsigned)bufSize; value.result = (DWORD)result; - GetMethodBlockCounts->Add(CastHandle(ftnHnd), value); + GetPgoInstrumentationResults->Add(CastHandle(ftnHnd), value); } -void MethodContext::dmpGetMethodBlockCounts(DWORDLONG key, const Agnostic_GetMethodBlockCounts& value) +void MethodContext::dmpGetPgoInstrumentationResults(DWORDLONG key, const Agnostic_GetPgoInstrumentationResults& value) { - printf("GetMethodBlockCounts key ftn-%016llX, value cnt-%u profileBuf-", key, value.count); - ICorJitInfo::BlockCounts* pBuf = - (ICorJitInfo::BlockCounts*)GetMethodBlockCounts->GetBuffer(value.pBlockCounts_index); - for (DWORD i = 0; i < value.count; i++, pBuf++) + printf("GetMethodBlockCounts key ftn-%016llX, value schemaCnt-%u profileBufSize-%u", key, value.schemaCount, value.dataByteCount); + Agnostic_PgoInstrumentationSchema* pBuf = + (Agnostic_PgoInstrumentationSchema*)GetPgoInstrumentationResults->GetBuffer(value.schema_index); + + for (UINT32 i = 0; i < value.schemaCount; i++) { - printf("{il-%u,cnt-%u}", pBuf->ILOffset, pBuf->ExecutionCount); + printf(" Offset %016llX ILOffset %u Kind %u Count %u Other %u\n", pBuf[i].Offset, pBuf[i].ILOffset, pBuf[i].InstrumentationKind, pBuf[i].Count, pBuf[i].Other); } - GetMethodBlockCounts->Unlock(); - printf(" numRuns-%u result-%u", value.numRuns, value.result); + + // TODO, dump actual count data } -HRESULT MethodContext::repGetMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd, - UINT32 * pCount, - ICorJitInfo::BlockCounts** pBlockCounts, - UINT32 * pNumRuns) +DWORD MethodContext::repGetPgoInstrumentationResults(CORINFO_METHOD_HANDLE ftnHnd, + ICorJitInfo::PgoInstrumentationSchema** pSchema, + UINT32* pCountSchemaItems, + BYTE** pInstrumentationData) { - Agnostic_GetMethodBlockCounts tempValue; + Agnostic_GetPgoInstrumentationResults tempValue; - tempValue = GetMethodBlockCounts->Get(CastHandle(ftnHnd)); + tempValue = GetPgoInstrumentationResults->Get(CastHandle(ftnHnd)); + + *pCountSchemaItems = (UINT32)tempValue.schemaCount; + *pInstrumentationData = (BYTE*)GetPgoInstrumentationResults->GetBuffer(tempValue.data_index); + + *pSchema = (ICorJitInfo::PgoInstrumentationSchema*)AllocJitTempBuffer(tempValue.schemaCount * sizeof(ICorJitInfo::PgoInstrumentationSchema)); + + Agnostic_PgoInstrumentationSchema* pAgnosticSchema = (Agnostic_PgoInstrumentationSchema*)GetPgoInstrumentationResults->GetBuffer(tempValue.schema_index); + for (UINT32 iSchema = 0; iSchema < tempValue.schemaCount; iSchema++) + { + (*pSchema)[iSchema].Offset = (size_t)pAgnosticSchema[iSchema].Offset; + (*pSchema)[iSchema].ILOffset = pAgnosticSchema[iSchema].ILOffset; + (*pSchema)[iSchema].InstrumentationKind = pAgnosticSchema[iSchema].InstrumentationKind; + (*pSchema)[iSchema].Count = pAgnosticSchema[iSchema].Count; + (*pSchema)[iSchema].Other = pAgnosticSchema[iSchema].Other; + } - *pCount = (UINT32)tempValue.count; - *pBlockCounts = (ICorJitInfo::BlockCounts*)GetMethodBlockCounts->GetBuffer(tempValue.pBlockCounts_index); - *pNumRuns = (UINT32)tempValue.numRuns; HRESULT result = (HRESULT)tempValue.result; return result; } diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.h b/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.h index ecdff41150e3c..30f5533d498fb 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.h +++ b/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.h @@ -635,20 +635,13 @@ class MethodContext void dmpGetFieldThreadLocalStoreID(DWORDLONG key, DLD value); DWORD repGetFieldThreadLocalStoreID(CORINFO_FIELD_HANDLE field, void** ppIndirection); - void recAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts, HRESULT result); - void dmpAllocMethodBlockCounts(DWORD key, const Agnostic_AllocMethodBlockCounts& value); - HRESULT repAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts); - - void recGetMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd, - UINT32 * pCount, - ICorJitInfo::BlockCounts** pBlockCounts, - UINT32 * pNumRuns, - HRESULT result); - void dmpGetMethodBlockCounts(DWORDLONG key, const Agnostic_GetMethodBlockCounts& value); - HRESULT repGetMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd, - UINT32 * pCount, - ICorJitInfo::BlockCounts** pBlockCounts, - UINT32 * pNumRuns); + void recAllocPgoInstrumentationBySchema(CORINFO_METHOD_HANDLE ftnHnd, ICorJitInfo::PgoInstrumentationSchema* pSchema, UINT32 countSchemaItems, BYTE** pInstrumentationData, HRESULT result); + void dmpAllocPgoInstrumentationBySchema(DWORDLONG key, const Agnostic_AllocPgoInstrumentationBySchema& value); + DWORD repAllocPgoInstrumentationBySchema(CORINFO_METHOD_HANDLE ftnHnd, ICorJitInfo::PgoInstrumentationSchema* pSchema, UINT32 countSchemaItems, BYTE** pInstrumentationData); + + void recGetPgoInstrumentationResults(CORINFO_METHOD_HANDLE ftnHnd, ICorJitInfo::PgoInstrumentationSchema** pSchema, UINT32* pCountSchemaItems, BYTE** pInstrumentationData, HRESULT result); + void dmpGetPgoInstrumentationResults(DWORDLONG key, const Agnostic_GetPgoInstrumentationResults& value); + DWORD repGetPgoInstrumentationResults(CORINFO_METHOD_HANDLE ftnHnd, ICorJitInfo::PgoInstrumentationSchema** pSchema, UINT32* pCountSchemaItems, BYTE** pInstrumentationData); void recGetLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, CORINFO_CLASS_HANDLE baseHnd, UINT32 ilOffset, CORINFO_CLASS_HANDLE classHnd, UINT32* pLikelihood, UINT32* pNumberOfClasses); void dmpGetLikelyClass(const Agnostic_GetLikelyClass& key, const Agnostic_GetLikelyClassResult& value); @@ -823,14 +816,39 @@ class MethodContext // MD5 hasher static Hash m_hash; + + // Scheme for jit time temporary allocations + struct DeletionNode + { + DeletionNode* pNext; + }; + DeletionNode *nodesToDelete = nullptr; + + void* AllocJitTempBuffer(size_t size) + { + DeletionNode *pDeletionNode = (DeletionNode *)malloc(sizeof(DeletionNode) + size); + pDeletionNode = this->nodesToDelete; + this->nodesToDelete = pDeletionNode; + return pDeletionNode + 1; + } + + void FreeTempAllocations() + { + while (nodesToDelete != nullptr) + { + DeletionNode *next = nodesToDelete->pNext; + free(nodesToDelete); + nodesToDelete = next; + } + } }; // ********************* Please keep this up-to-date to ease adding more *************** -// Highest packet number: 185 +// Highest packet number: 187 // ************************************************************************************* enum mcPackets { - Packet_AllocMethodBlockCounts = 131, + Packet_AllocMethodBlockCounts = 131, // retired 1/4/2021 Packet_AppendClassName = 149, // Added 8/6/2014 - needed for SIMD Packet_AreTypesEquivalent = 1, Packet_AsCorInfoType = 2, @@ -988,6 +1006,8 @@ enum mcPackets Packet_SatisfiesMethodConstraints = 111, Packet_ShouldEnforceCallvirtRestriction = 112, // Retired 2/18/2020 Packet_SigInstHandleMap = 184, + Packet_AllocPgoInstrumentationBySchema = 186, // Added 1/4/2021 + Packet_GetPgoInstrumentationResults = 187, // Added 1/4/2021 PacketCR_AddressMap = 113, PacketCR_AllocGCInfo = 114, diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp b/src/coreclr/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp index ed2cfeb358813..6f07edb6a9dc4 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp +++ b/src/coreclr/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp @@ -2025,25 +2025,27 @@ void interceptor_ICJI::reportFatalError(CorJitResult result) // allocate a basic block profile buffer where execution counts will be stored // for jitted basic blocks. -HRESULT interceptor_ICJI::allocMethodBlockCounts(UINT32 count, // The number of basic blocks that we have - BlockCounts** pBlockCounts) -{ - mc->cr->AddCall("allocMethodBlockCounts"); - HRESULT result = original_ICorJitInfo->allocMethodBlockCounts(count, pBlockCounts); - mc->recAllocMethodBlockCounts(count, pBlockCounts, result); +HRESULT interceptor_ICJI::allocPgoInstrumentationBySchema(CORINFO_METHOD_HANDLE ftnHnd, + PgoInstrumentationSchema* pSchema, + UINT32 countSchemaItems, + BYTE** pInstrumentationData) +{ + mc->cr->AddCall("allocPgoInstrumentationBySchema"); + HRESULT result = original_ICorJitInfo->allocPgoInstrumentationBySchema(ftnHnd, pSchema, countSchemaItems, pInstrumentationData); + mc->recAllocPgoInstrumentationBySchema(ftnHnd, pSchema, countSchemaItems, pInstrumentationData, result); return result; } // get profile information to be used for optimizing the current method. The format // of the buffer is the same as the format the JIT passes to allocMethodBlockCounts. -HRESULT interceptor_ICJI::getMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd, - UINT32 * pCount, // The number of basic blocks that we have - BlockCounts** pBlockCounts, - UINT32 * pNumRuns) -{ - mc->cr->AddCall("getMethodBlockCounts"); - HRESULT temp = original_ICorJitInfo->getMethodBlockCounts(ftnHnd, pCount, pBlockCounts, pNumRuns); - mc->recGetMethodBlockCounts(ftnHnd, pCount, pBlockCounts, pNumRuns, temp); +HRESULT interceptor_ICJI::getPgoInstrumentationResults(CORINFO_METHOD_HANDLE ftnHnd, + PgoInstrumentationSchema **pSchema, // pointer to the schema table which describes the instrumentation results (pointer will not remain valid after jit completes) + UINT32 * pCountSchemaItems, // pointer to the count schema items + BYTE ** pInstrumentationData) // pointer to the actual instrumentation data (pointer will not remain valid after jit completes) +{ + mc->cr->AddCall("getPgoInstrumentationResults"); + HRESULT temp = original_ICorJitInfo->getPgoInstrumentationResults(ftnHnd, pSchema, pCountSchemaItems, pInstrumentationData); + mc->recGetPgoInstrumentationResults(ftnHnd, pSchema, pCountSchemaItems, pInstrumentationData, temp); return temp; } diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp b/src/coreclr/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp index 359a4bf55523c..6945b6d74d38d 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp +++ b/src/coreclr/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp @@ -1318,22 +1318,24 @@ void interceptor_ICJI::reportFatalError( original_ICorJitInfo->reportFatalError(result); } -HRESULT interceptor_ICJI::allocMethodBlockCounts( - UINT32 count, - ICorJitInfo::BlockCounts** pBlockCounts) +HRESULT interceptor_ICJI::getPgoInstrumentationResults( + CORINFO_METHOD_HANDLE ftnHnd, + PgoInstrumentationSchema** pSchema, + UINT32* pCountSchemaItems, + BYTE** pInstrumentationData) { - mcs->AddCall("allocMethodBlockCounts"); - return original_ICorJitInfo->allocMethodBlockCounts(count, pBlockCounts); + mcs->AddCall("getPgoInstrumentationResults"); + return original_ICorJitInfo->getPgoInstrumentationResults(ftnHnd, pSchema, pCountSchemaItems, pInstrumentationData); } -HRESULT interceptor_ICJI::getMethodBlockCounts( +HRESULT interceptor_ICJI::allocPgoInstrumentationBySchema( CORINFO_METHOD_HANDLE ftnHnd, - UINT32* pCount, - ICorJitInfo::BlockCounts** pBlockCounts, - UINT32* pNumRuns) + PgoInstrumentationSchema* pSchema, + UINT32 countSchemaItems, + BYTE** pInstrumentationData) { - mcs->AddCall("getMethodBlockCounts"); - return original_ICorJitInfo->getMethodBlockCounts(ftnHnd, pCount, pBlockCounts, pNumRuns); + mcs->AddCall("allocPgoInstrumentationBySchema"); + return original_ICorJitInfo->allocPgoInstrumentationBySchema(ftnHnd, pSchema, countSchemaItems, pInstrumentationData); } CORINFO_CLASS_HANDLE interceptor_ICJI::getLikelyClass( diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp b/src/coreclr/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp index 22c019ab80c4e..1d73a9759c600 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp +++ b/src/coreclr/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp @@ -1155,20 +1155,22 @@ void interceptor_ICJI::reportFatalError( original_ICorJitInfo->reportFatalError(result); } -HRESULT interceptor_ICJI::allocMethodBlockCounts( - UINT32 count, - ICorJitInfo::BlockCounts** pBlockCounts) +HRESULT interceptor_ICJI::getPgoInstrumentationResults( + CORINFO_METHOD_HANDLE ftnHnd, + PgoInstrumentationSchema** pSchema, + UINT32* pCountSchemaItems, + BYTE** pInstrumentationData) { - return original_ICorJitInfo->allocMethodBlockCounts(count, pBlockCounts); + return original_ICorJitInfo->getPgoInstrumentationResults(ftnHnd, pSchema, pCountSchemaItems, pInstrumentationData); } -HRESULT interceptor_ICJI::getMethodBlockCounts( +HRESULT interceptor_ICJI::allocPgoInstrumentationBySchema( CORINFO_METHOD_HANDLE ftnHnd, - UINT32* pCount, - ICorJitInfo::BlockCounts** pBlockCounts, - UINT32* pNumRuns) + PgoInstrumentationSchema* pSchema, + UINT32 countSchemaItems, + BYTE** pInstrumentationData) { - return original_ICorJitInfo->getMethodBlockCounts(ftnHnd, pCount, pBlockCounts, pNumRuns); + return original_ICorJitInfo->allocPgoInstrumentationBySchema(ftnHnd, pSchema, countSchemaItems, pInstrumentationData); } CORINFO_CLASS_HANDLE interceptor_ICJI::getLikelyClass( diff --git a/src/coreclr/ToolBox/superpmi/superpmi/icorjitinfo.cpp b/src/coreclr/ToolBox/superpmi/superpmi/icorjitinfo.cpp index 691f9973ce262..5e85ded0486d8 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi/icorjitinfo.cpp +++ b/src/coreclr/ToolBox/superpmi/superpmi/icorjitinfo.cpp @@ -1800,22 +1800,24 @@ void MyICJI::reportFatalError(CorJitResult result) // allocate a basic block profile buffer where execution counts will be stored // for jitted basic blocks. -HRESULT MyICJI::allocMethodBlockCounts(UINT32 count, // The number of basic blocks that we have - BlockCounts** pBlockCounts) +HRESULT MyICJI::allocPgoInstrumentationBySchema(CORINFO_METHOD_HANDLE ftnHnd, + PgoInstrumentationSchema* pSchema, + UINT32 countSchemaItems, + BYTE** pInstrumentationData) { - jitInstance->mc->cr->AddCall("allocMethodBlockCounts"); - return jitInstance->mc->repAllocMethodBlockCounts(count, pBlockCounts); + jitInstance->mc->cr->AddCall("allocPgoInstrumentationBySchema"); + return jitInstance->mc->repAllocPgoInstrumentationBySchema(ftnHnd, pSchema, countSchemaItems, pInstrumentationData); } // get profile information to be used for optimizing the current method. The format // of the buffer is the same as the format the JIT passes to allocMethodBlockCounts. -HRESULT MyICJI::getMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd, - UINT32 * pCount, // The number of basic blocks that we have - BlockCounts** pBlockCounts, - UINT32 * pNumRuns) +HRESULT MyICJI::getPgoInstrumentationResults(CORINFO_METHOD_HANDLE ftnHnd, + PgoInstrumentationSchema **pSchema, // pointer to the schema table which describes the instrumentation results (pointer will not remain valid after jit completes) + UINT32 * pCountSchemaItems, // pointer to the count schema items + BYTE ** pInstrumentationData) // pointer to the actual instrumentation data (pointer will not remain valid after jit completes) { - jitInstance->mc->cr->AddCall("getMethodBlockCounts"); - return jitInstance->mc->repGetMethodBlockCounts(ftnHnd, pCount, pBlockCounts, pNumRuns); + jitInstance->mc->cr->AddCall("getPgoInstrumentationResults"); + return jitInstance->mc->repGetPgoInstrumentationResults(ftnHnd, pSchema, pCountSchemaItems, pInstrumentationData); } // Get the likely implementing class for a virtual call or interface call made by ftnHnd diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def index 7b746c6628949..76fadc317d945 100644 --- a/src/coreclr/inc/CrstTypes.def +++ b/src/coreclr/inc/CrstTypes.def @@ -572,3 +572,7 @@ End Crst NativeImageLoad End + +Crst PgoData + AcquiredBefore LoaderHeap +End \ No newline at end of file diff --git a/src/coreclr/inc/corjit.h b/src/coreclr/inc/corjit.h index f23f1407870fc..19d6b3ab028c2 100644 --- a/src/coreclr/inc/corjit.h +++ b/src/coreclr/inc/corjit.h @@ -253,9 +253,7 @@ class ICorJitInfo : public ICorDynamicInfo // Data structure for a single class probe. // - // ILOffset is the IL offset in the method for the call site being probed. - // Currently it must be ORed with CLASS_FLAG and (for interface calls) - // INTERFACE_FLAG. + // CLASS_FLAG and INTERFACE_FLAG are placed into the Other field in the schema // // Count is the number of times a call was made at that call site. // @@ -279,20 +277,69 @@ class ICorJitInfo : public ICorDynamicInfo CORINFO_CLASS_HANDLE ClassTable[SIZE]; }; - // allocate a basic block profile buffer where execution counts will be stored - // for jitted basic blocks. - virtual HRESULT allocMethodBlockCounts ( - UINT32 count, // The number of basic blocks that we have - BlockCounts ** pBlockCounts // pointer to array of tuples + enum class PgoInstrumentationKind + { + // Schema data types + None = 0, + FourByte = 1, + EightByte = 2, + TypeHandle = 3, + + // Mask of all schema data types + MarshalMask = 0xF, + + // ExcessAlignment + Align4Byte = 0x10, + Align8Byte = 0x20, + AlignPointer = 0x30, + + // Mask of all schema data types + AlignMask = 0x30, + + DescriptorMin = 0x40, + + Done = None, // All instrumentation schemas must end with a record which is "Done" + BasicBlockIntCount = DescriptorMin | FourByte, // 4 byte basic block counter, using unsigned 4 byte int + TypeHandleHistogramCount = (DescriptorMin * 1) | FourByte | AlignPointer, // 4 byte counter that is part of a type histogram + TypeHandleHistogramTypeHandle = (DescriptorMin * 1) | TypeHandle, // TypeHandle that is part of a type histogram + Version = (DescriptorMin * 2) | None, // Version is encoded in the Other field of the schema + NumRuns = (DescriptorMin * 3) | None, // Number of runs is encoded in the Other field of the schema + }; + + struct PgoInstrumentationSchema + { + size_t Offset; + PgoInstrumentationKind InstrumentationKind; + int32_t ILOffset; + int32_t Count; + int32_t Other; + }; + + // get profile information to be used for optimizing a current method. The format + // of the buffer is the same as the format the JIT passes to allocPgoInstrumentationBySchema. + virtual HRESULT getPgoInstrumentationResults( + CORINFO_METHOD_HANDLE ftnHnd, + PgoInstrumentationSchema **pSchema, // pointer to the schema table which describes the instrumentation results (pointer will not remain valid after jit completes) + UINT32 * pCountSchemaItems, // pointer to the count schema items + BYTE ** pInstrumentationData // pointer to the actual instrumentation data (pointer will not remain valid after jit completes) ) = 0; - // get profile information to be used for optimizing the current method. The format - // of the buffer is the same as the format the JIT passes to allocBBProfileBuffer. - virtual HRESULT getMethodBlockCounts( - CORINFO_METHOD_HANDLE ftnHnd, - UINT32 * pCount, // pointer to the count of tuples - BlockCounts ** pBlockCounts, // pointer to array of tuples - UINT32 * pNumRuns // pointer to the total number of profile scenarios run + // Allocate a profile buffer for use in the current process + // The JIT shall call this api with the schema entries other than Offset filled in. + // The VM is responsible for allocating the buffer, and computing the various offsets + // The offset calculation shall obey the following rules + // 1. All data fields shall be naturally aligned. + // 2. The first offset may be arbitrarily large. + // 3. The JIT may mark a schema item with an alignment flag. This may be used to increase the alignment of a field. + // 4. Each data entry shall be laid out without extra padding. + // + // The intention here is that it becomes possible to describe a C data structure with the alignment for ease of use with + // instrumentation helper functions + virtual HRESULT allocPgoInstrumentationBySchema ( + CORINFO_METHOD_HANDLE ftnHnd, + PgoInstrumentationSchema *pSchema, // pointer to the schema table which describes the instrumentation results + UINT32 countSchemaItems, // pointer to the count schema items + BYTE ** pInstrumentationData // pointer to the actual instrumentation data ) = 0; // Get the likely implementing class for a virtual call or interface call made by ftnHnd diff --git a/src/coreclr/inc/crsttypes.h b/src/coreclr/inc/crsttypes.h index 31893c360d712..316c7fd10a19f 100644 --- a/src/coreclr/inc/crsttypes.h +++ b/src/coreclr/inc/crsttypes.h @@ -93,46 +93,47 @@ enum CrstType CrstObjectList = 75, CrstPEImage = 76, CrstPendingTypeLoadEntry = 77, - CrstPinnedByrefValidation = 78, - CrstProfilerGCRefDataFreeList = 79, - CrstProfilingAPIStatus = 80, - CrstRCWCache = 81, - CrstRCWCleanupList = 82, - CrstReadyToRunEntryPointToMethodDescMap = 83, - CrstReflection = 84, - CrstReJITGlobalRequest = 85, - CrstRetThunkCache = 86, - CrstSavedExceptionInfo = 87, - CrstSaveModuleProfileData = 88, - CrstSecurityStackwalkCache = 89, - CrstSigConvert = 90, - CrstSingleUseLock = 91, - CrstSpecialStatics = 92, - CrstStackSampler = 93, - CrstStressLog = 94, - CrstStubCache = 95, - CrstStubDispatchCache = 96, - CrstStubUnwindInfoHeapSegments = 97, - CrstSyncBlockCache = 98, - CrstSyncHashLock = 99, - CrstSystemBaseDomain = 100, - CrstSystemDomain = 101, - CrstSystemDomainDelayedUnloadList = 102, - CrstThreadIdDispenser = 103, - CrstThreadpoolTimerQueue = 104, - CrstThreadpoolWaitThreads = 105, - CrstThreadpoolWorker = 106, - CrstThreadStore = 107, - CrstTieredCompilation = 108, - CrstTypeEquivalenceMap = 109, - CrstTypeIDMap = 110, - CrstUMEntryThunkCache = 111, - CrstUniqueStack = 112, - CrstUnresolvedClassLock = 113, - CrstUnwindInfoTableLock = 114, - CrstVSDIndirectionCellLock = 115, - CrstWrapperTemplate = 116, - kNumberOfCrstTypes = 117 + CrstPgoData = 78, + CrstPinnedByrefValidation = 79, + CrstProfilerGCRefDataFreeList = 80, + CrstProfilingAPIStatus = 81, + CrstRCWCache = 82, + CrstRCWCleanupList = 83, + CrstReadyToRunEntryPointToMethodDescMap = 84, + CrstReflection = 85, + CrstReJITGlobalRequest = 86, + CrstRetThunkCache = 87, + CrstSavedExceptionInfo = 88, + CrstSaveModuleProfileData = 89, + CrstSecurityStackwalkCache = 90, + CrstSigConvert = 91, + CrstSingleUseLock = 92, + CrstSpecialStatics = 93, + CrstStackSampler = 94, + CrstStressLog = 95, + CrstStubCache = 96, + CrstStubDispatchCache = 97, + CrstStubUnwindInfoHeapSegments = 98, + CrstSyncBlockCache = 99, + CrstSyncHashLock = 100, + CrstSystemBaseDomain = 101, + CrstSystemDomain = 102, + CrstSystemDomainDelayedUnloadList = 103, + CrstThreadIdDispenser = 104, + CrstThreadpoolTimerQueue = 105, + CrstThreadpoolWaitThreads = 106, + CrstThreadpoolWorker = 107, + CrstThreadStore = 108, + CrstTieredCompilation = 109, + CrstTypeEquivalenceMap = 110, + CrstTypeIDMap = 111, + CrstUMEntryThunkCache = 112, + CrstUniqueStack = 113, + CrstUnresolvedClassLock = 114, + CrstUnwindInfoTableLock = 115, + CrstVSDIndirectionCellLock = 116, + CrstWrapperTemplate = 117, + kNumberOfCrstTypes = 118 }; #endif // __CRST_TYPES_INCLUDED @@ -221,6 +222,7 @@ int g_rgCrstLevelMap[] = 2, // CrstObjectList 4, // CrstPEImage 19, // CrstPendingTypeLoadEntry + 3, // CrstPgoData 0, // CrstPinnedByrefValidation 0, // CrstProfilerGCRefDataFreeList 0, // CrstProfilingAPIStatus @@ -343,6 +345,7 @@ LPCSTR g_rgCrstNameMap[] = "CrstObjectList", "CrstPEImage", "CrstPendingTypeLoadEntry", + "CrstPgoData", "CrstPinnedByrefValidation", "CrstProfilerGCRefDataFreeList", "CrstProfilingAPIStatus", diff --git a/src/coreclr/inc/eventtracebase.h b/src/coreclr/inc/eventtracebase.h index dc7b0397ef49e..38909a0bc8229 100644 --- a/src/coreclr/inc/eventtracebase.h +++ b/src/coreclr/inc/eventtracebase.h @@ -929,6 +929,7 @@ namespace ETW static VOID MethodRestored(MethodDesc * pMethodDesc); static VOID MethodTableRestored(MethodTable * pMethodTable); static VOID DynamicMethodDestroyed(MethodDesc *pMethodDesc); + static VOID LogMethodInstrumentationData(MethodDesc* method, uint32_t cbData, BYTE *data); #else // FEATURE_EVENT_TRACE public: static VOID GetR2RGetEntryPointStart(MethodDesc *pMethodDesc) {}; @@ -940,6 +941,7 @@ namespace ETW static VOID MethodRestored(MethodDesc * pMethodDesc) {}; static VOID MethodTableRestored(MethodTable * pMethodTable) {}; static VOID DynamicMethodDestroyed(MethodDesc *pMethodDesc) {}; + static VOID LogMethodInstrumentationData(MethodDesc* method, uint32_t cbData, BYTE *data) {}; #endif // FEATURE_EVENT_TRACE }; diff --git a/src/coreclr/inc/icorjitinfoimpl_generated.h b/src/coreclr/inc/icorjitinfoimpl_generated.h index 7a07ae42f6f66..20219a094d659 100644 --- a/src/coreclr/inc/icorjitinfoimpl_generated.h +++ b/src/coreclr/inc/icorjitinfoimpl_generated.h @@ -675,15 +675,17 @@ int doAssert( void reportFatalError( CorJitResult result) override; -HRESULT allocMethodBlockCounts( - UINT32 count, - ICorJitInfo::BlockCounts** pBlockCounts) override; +HRESULT getPgoInstrumentationResults( + CORINFO_METHOD_HANDLE ftnHnd, + PgoInstrumentationSchema** pSchema, + UINT32* pCountSchemaItems, + BYTE** pInstrumentationData) override; -HRESULT getMethodBlockCounts( +HRESULT allocPgoInstrumentationBySchema( CORINFO_METHOD_HANDLE ftnHnd, - UINT32* pCount, - ICorJitInfo::BlockCounts** pBlockCounts, - UINT32* pNumRuns) override; + PgoInstrumentationSchema* pSchema, + UINT32 countSchemaItems, + BYTE** pInstrumentationData) override; CORINFO_CLASS_HANDLE getLikelyClass( CORINFO_METHOD_HANDLE ftnHnd, diff --git a/src/coreclr/inc/opcode.def b/src/coreclr/inc/opcode.def index 09e969f59fe4d..652b959203e8e 100644 --- a/src/coreclr/inc/opcode.def +++ b/src/coreclr/inc/opcode.def @@ -328,10 +328,11 @@ OPDEF(CEE_UNUSED55, "unused", Pop0, Pu OPDEF(CEE_UNUSED70, "unused", Pop0, Push0, InlineNone, IPrimitive, 2, 0xFE, 0x22, NEXT) // These are not real opcodes, but they are handy internally in the EE - +#ifndef OPDEF_REAL_OPCODES_ONLY OPDEF(CEE_ILLEGAL, "illegal", Pop0, Push0, InlineNone, IInternal, 0, MOOT, MOOT, META) OPDEF(CEE_MACRO_END, "endmac", Pop0, Push0, InlineNone, IInternal, 0, MOOT, MOOT, META) OPDEF(CEE_CODE_LABEL, "codelabel", Pop0, Push0, InlineNone, IInternal, 0, MOOT, MOOT, META) +#endif // OPDEF_REAL_OPCODES_ONLY #ifndef OPALIAS #define _OPALIAS_DEFINED_ diff --git a/src/coreclr/inc/pgo_formatprocessing.h b/src/coreclr/inc/pgo_formatprocessing.h new file mode 100644 index 0000000000000..87780a89f13fa --- /dev/null +++ b/src/coreclr/inc/pgo_formatprocessing.h @@ -0,0 +1,395 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// This header defines the algorithms for generating and parsing pgo compressed schema formats + +#ifndef PGO_FORMATPROCESSING_H +#define PGO_FORMATPROCESSING_H + +#ifdef FEATURE_PGO + +#define DEFAULT_UNKNOWN_TYPEHANDLE 1 +#define UNKNOWN_TYPEHANDLE_MIN 1 +#define UNKNOWN_TYPEHANDLE_MAX 32 + +inline bool AddTypeHandleToUnknownTypeHandleMask(INT_PTR typeHandle, uint32_t *unknownTypeHandleMask) +{ + uint32_t bitMask = (uint32_t)(1 << (typeHandle - UNKNOWN_TYPEHANDLE_MIN)); + bool result = (bitMask & *unknownTypeHandleMask) == 0; + *unknownTypeHandleMask |= bitMask; + return result; +} + +inline bool IsUnknownTypeHandle(INT_PTR typeHandle) +{ + return ((typeHandle >= UNKNOWN_TYPEHANDLE_MIN) && (typeHandle <= UNKNOWN_TYPEHANDLE_MAX)); +} + +inline INT_PTR HashToPgoUnknownTypeHandle(uint32_t hash) +{ + // Map from a 32bit hash to the 32 different unknown type handle values + return (hash & 0x1F) + 1; +} + +inline ICorJitInfo::PgoInstrumentationKind operator|(ICorJitInfo::PgoInstrumentationKind a, ICorJitInfo::PgoInstrumentationKind b) +{ + return static_cast(static_cast(a) | static_cast(b)); +} + +inline ICorJitInfo::PgoInstrumentationKind operator&(ICorJitInfo::PgoInstrumentationKind a, ICorJitInfo::PgoInstrumentationKind b) +{ + return static_cast(static_cast(a) & static_cast(b)); +} + +inline ICorJitInfo::PgoInstrumentationKind operator-(ICorJitInfo::PgoInstrumentationKind a, ICorJitInfo::PgoInstrumentationKind b) +{ + return static_cast(static_cast(a) - static_cast(b)); +} + +inline ICorJitInfo::PgoInstrumentationKind operator~(ICorJitInfo::PgoInstrumentationKind a) +{ + return static_cast(~static_cast(a)); +} + +template +bool ReadCompressedInts(const uint8_t *pByte, size_t cbDataMax, IntHandler intProcessor) +{ + while (cbDataMax > 0) + { + // This logic is a variant on CorSigUncompressSignedInt which allows for the full range of an int32_t + int32_t signedInt; + if ((*pByte & 0x80) == 0x0) // 0??? ???? + { + signedInt = *pByte >> 1; + if (*pByte & 1) + signedInt |= SIGN_MASK_ONEBYTE; + + pByte += 1; + cbDataMax -=1; + } + else if ((*pByte & 0xC0) == 0x80) // 10?? ???? + { + if (cbDataMax < 2) + return false; + + int shiftedInt = ((*pByte & 0x3f) << 8) | *(pByte + 1); + signedInt = shiftedInt >> 1; + if (shiftedInt & 1) + signedInt |= SIGN_MASK_TWOBYTE; + + pByte += 2; + cbDataMax -= 2; + } + else + { + if (cbDataMax < 5) + return false; + + signedInt = (int32_t)((*(pByte + 1) << 24 | *(pByte+2) << 16 | *(pByte+3) << 8 | *(pByte+4))); + + pByte += 5; + cbDataMax -= 5; + } + + if (!intProcessor(signedInt)) + { + return false; + } + } + + return true; +} + +enum class InstrumentationDataProcessingState +{ + Done = 0, + ILOffset = 0x1, + Type = 0x2, + Count = 0x4, + Other = 0x8, + UpdateProcessMask = 0xF, + UpdateProcessMaskFlag = 0x100, +}; + +inline InstrumentationDataProcessingState operator|(InstrumentationDataProcessingState a, InstrumentationDataProcessingState b) +{ + return static_cast(static_cast(a) | static_cast(b)); +} + +inline InstrumentationDataProcessingState operator&(InstrumentationDataProcessingState a, InstrumentationDataProcessingState b) +{ + return static_cast(static_cast(a) & static_cast(b)); +} + +inline InstrumentationDataProcessingState operator~(InstrumentationDataProcessingState a) +{ + return static_cast(~static_cast(a)); +} + +template +bool ReadInstrumentationData(const uint8_t *pByte, size_t cbDataMax, SchemaHandler handler) +{ + ICorJitInfo::PgoInstrumentationSchema curSchema; + InstrumentationDataProcessingState processingState; + bool done = false; + + memset(&curSchema, 0, sizeof(curSchema)); + processingState = InstrumentationDataProcessingState::UpdateProcessMaskFlag; + ReadCompressedInts(pByte, cbDataMax, [&curSchema, handler, &processingState, &done](int32_t curValue) + { + if (processingState == InstrumentationDataProcessingState::UpdateProcessMaskFlag) + { + processingState = (InstrumentationDataProcessingState)curValue; + return true; + } + + if ((processingState & InstrumentationDataProcessingState::ILOffset) == InstrumentationDataProcessingState::ILOffset) + { + curSchema.ILOffset += curValue; + processingState = processingState & ~InstrumentationDataProcessingState::ILOffset; + } + else if ((processingState & InstrumentationDataProcessingState::Type) == InstrumentationDataProcessingState::Type) + { + curSchema.InstrumentationKind = static_cast(static_cast(curSchema.InstrumentationKind) + curValue); + processingState = processingState & ~InstrumentationDataProcessingState::Type; + } + else if ((processingState & InstrumentationDataProcessingState::Count) == InstrumentationDataProcessingState::Count) + { + curSchema.Count += curValue; + processingState = processingState & ~InstrumentationDataProcessingState::Count; + } + else if ((processingState & InstrumentationDataProcessingState::Other) == InstrumentationDataProcessingState::Other) + { + curSchema.Other += curValue; + processingState = processingState & ~InstrumentationDataProcessingState::Other; + } + + if (processingState == InstrumentationDataProcessingState::Done) + { + processingState = InstrumentationDataProcessingState::UpdateProcessMaskFlag; + if (curSchema.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::Done) + { + done = true; + return false; + } + + if (!handler(curSchema)) + { + return false; + } + + } + return true; + }); + + return done; +} + +inline bool CountInstrumentationDataSize(const uint8_t *pByte, size_t cbDataMax, int32_t *pInstrumentationSchemaCount) +{ + *pInstrumentationSchemaCount = 0; + return ReadInstrumentationData(pByte, cbDataMax, [pInstrumentationSchemaCount](const ICorJitInfo::PgoInstrumentationSchema& schema) { (*pInstrumentationSchemaCount)++; return true; }); +} + +inline bool ComparePgoSchemaEquals(const uint8_t *pByte, size_t cbDataMax, const ICorJitInfo::PgoInstrumentationSchema* schemaTable, size_t cSchemas) +{ + size_t iSchema = 0; + return ReadInstrumentationData(pByte, cbDataMax, [schemaTable, cSchemas, &iSchema](const ICorJitInfo::PgoInstrumentationSchema& schema) + { + if (iSchema >= cSchemas) + return false; + + if (schema.InstrumentationKind != schemaTable[iSchema].InstrumentationKind) + return false; + + if (schema.ILOffset != schemaTable[iSchema].ILOffset) + return false; + + if (schema.Count != schemaTable[iSchema].Count) + return false; + + if (schema.Other != schemaTable[iSchema].Other) + return false; + + return true; + }); +} + +inline uint32_t InstrumentationKindToSize(ICorJitInfo::PgoInstrumentationKind kind) +{ + switch(kind & ICorJitInfo::PgoInstrumentationKind::MarshalMask) + { + case ICorJitInfo::PgoInstrumentationKind::None: + return 0; + case ICorJitInfo::PgoInstrumentationKind::FourByte: + return 4; + case ICorJitInfo::PgoInstrumentationKind::EightByte: + return 8; + case ICorJitInfo::PgoInstrumentationKind::TypeHandle: + return TARGET_POINTER_SIZE; + default: + _ASSERTE(FALSE); + return 0; + } +} + +inline UINT InstrumentationKindToAlignment(ICorJitInfo::PgoInstrumentationKind kind) +{ + switch(kind & ICorJitInfo::PgoInstrumentationKind::AlignMask) + { + case ICorJitInfo::PgoInstrumentationKind::Align4Byte: + return 4; + case ICorJitInfo::PgoInstrumentationKind::Align8Byte: + return 8; + case ICorJitInfo::PgoInstrumentationKind::AlignPointer: + return TARGET_POINTER_SIZE; + default: + return (UINT)InstrumentationKindToSize(kind); + } +} + +inline void LayoutPgoInstrumentationSchema(const ICorJitInfo::PgoInstrumentationSchema& prevSchema, ICorJitInfo::PgoInstrumentationSchema* currentSchema) +{ + size_t instrumentationSize = InstrumentationKindToSize(currentSchema->InstrumentationKind); + if (instrumentationSize != 0) + { + currentSchema->Offset = (UINT)AlignUp((size_t)prevSchema.Offset + (size_t)InstrumentationKindToSize(prevSchema.InstrumentationKind) * prevSchema.Count, + InstrumentationKindToAlignment(currentSchema->InstrumentationKind)); + } + else + { + currentSchema->Offset = prevSchema.Offset; + } +} + +template +bool ReadInstrumentationDataWithLayout(const uint8_t *pByte, size_t cbDataMax, size_t initialOffset, SchemaHandler handler) +{ + ICorJitInfo::PgoInstrumentationSchema prevSchema; + memset(&prevSchema, 0, sizeof(ICorJitInfo::PgoInstrumentationSchema)); + prevSchema.Offset = initialOffset; + + return ReadInstrumentationData(pByte, cbDataMax, [&prevSchema, handler](ICorJitInfo::PgoInstrumentationSchema curSchema) + { + LayoutPgoInstrumentationSchema(prevSchema, &curSchema); + if (!handler(curSchema)) + return false; + prevSchema = curSchema; + return true; + }); +} + +inline bool ReadInstrumentationDataWithLayoutIntoSArray(const uint8_t *pByte, size_t cbDataMax, size_t initialOffset, SArray* pSchemas) +{ + return ReadInstrumentationDataWithLayout(pByte, cbDataMax, initialOffset, [pSchemas](const ICorJitInfo::PgoInstrumentationSchema &schema) + { + pSchemas->Append(schema); + return true; + }); +} + + +template +bool WriteCompressedIntToBytes(int32_t value, ByteWriter& byteWriter) +{ + uint8_t isSigned = 0; + + // This function is modeled on CorSigCompressSignedInt, but differs in that + // it handles arbitrary int32 values, not just a subset + if (value < 0) + isSigned = 1; + + if ((value & SIGN_MASK_ONEBYTE) == 0 || (value & SIGN_MASK_ONEBYTE) == SIGN_MASK_ONEBYTE) + { + return byteWriter((uint8_t)((value & ~SIGN_MASK_ONEBYTE) << 1 | isSigned)); + } + else if ((value & SIGN_MASK_TWOBYTE) == 0 || (value & SIGN_MASK_TWOBYTE) == SIGN_MASK_TWOBYTE) + { + int32_t iData = (int32_t)((value & ~SIGN_MASK_TWOBYTE) << 1 | isSigned); + _ASSERTE(iData <= 0x3fff); + byteWriter(uint8_t((iData >> 8) | 0x80)); + return byteWriter(uint8_t(iData & 0xff)); + } + else + { + // Unlike CorSigCompressSignedInt, this just writes a header bit + // then a full 4 bytes, ignoring the whole signed bit detail + byteWriter(0xC0); + byteWriter(uint8_t((value >> 24) & 0xff)); + byteWriter(uint8_t((value >> 16) & 0xff)); + byteWriter(uint8_t((value >> 8) & 0xff)); + return byteWriter(uint8_t((value >> 0) & 0xff)); + } +} + +template +bool WriteIndividualSchemaToBytes(ICorJitInfo::PgoInstrumentationSchema prevSchema, ICorJitInfo::PgoInstrumentationSchema curSchema, ByteWriter& byteWriter) +{ + int32_t ilOffsetDiff = curSchema.ILOffset - prevSchema.ILOffset; + int32_t OtherDiff = curSchema.Other - prevSchema.Other; + int32_t CountDiff = curSchema.Count - prevSchema.Count; + int32_t TypeDiff = (int32_t)curSchema.InstrumentationKind - (int32_t)prevSchema.InstrumentationKind; + + InstrumentationDataProcessingState modifyMask = (InstrumentationDataProcessingState)0; + + if (ilOffsetDiff != 0) + modifyMask = modifyMask | InstrumentationDataProcessingState::ILOffset; + if (TypeDiff != 0) + modifyMask = modifyMask | InstrumentationDataProcessingState::Type; + if (CountDiff != 0) + modifyMask = modifyMask | InstrumentationDataProcessingState::Count; + if (OtherDiff != 0) + modifyMask = modifyMask | InstrumentationDataProcessingState::Other; + + _ASSERTE(modifyMask != InstrumentationDataProcessingState::Done); + + WriteCompressedIntToBytes((int32_t)modifyMask, byteWriter); + if ((ilOffsetDiff != 0) && !WriteCompressedIntToBytes(ilOffsetDiff, byteWriter)) + return false; + if ((TypeDiff != 0) && !WriteCompressedIntToBytes(TypeDiff, byteWriter)) + return false; + if ((CountDiff != 0) && !WriteCompressedIntToBytes(CountDiff, byteWriter)) + return false; + if ((OtherDiff != 0) && !WriteCompressedIntToBytes(OtherDiff, byteWriter)) + return false; + + return true; +} + +template +bool WriteInstrumentationToBytes(const ICorJitInfo::PgoInstrumentationSchema* schemaTable, size_t cSchemas, const ByteWriter& byteWriter) +{ + ICorJitInfo::PgoInstrumentationSchema prevSchema; + memset(&prevSchema, 0, sizeof(ICorJitInfo::PgoInstrumentationSchema)); + + for (size_t iSchema = 0; iSchema < cSchemas; iSchema++) + { + if (!WriteIndividualSchemaToBytes(prevSchema, schemaTable[iSchema], byteWriter)) + return false; + prevSchema = schemaTable[iSchema]; + } + + // Terminate the schema list with an entry which is Done + ICorJitInfo::PgoInstrumentationSchema terminationSchema = prevSchema; + terminationSchema.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::Done; + if (!WriteIndividualSchemaToBytes(prevSchema, terminationSchema, byteWriter)) + return false; + + return true; +} + +inline bool WriteInstrumentationSchema(const ICorJitInfo::PgoInstrumentationSchema* schemaTable, size_t cSchemas, uint8_t* array, size_t byteCount) +{ + return WriteInstrumentationToBytes(schemaTable, cSchemas, [&array, &byteCount](uint8_t data) + { + if (byteCount == 0) + return false; + *array = data; + array += 1; + byteCount--; + return true; + }); +} + +#endif // FEATURE_PGO +#endif // PGO_FORMATPROCESSING_H diff --git a/src/coreclr/jit/ICorJitInfo_API_names.h b/src/coreclr/jit/ICorJitInfo_API_names.h index d5a683a12176b..1c74f8b96aed7 100644 --- a/src/coreclr/jit/ICorJitInfo_API_names.h +++ b/src/coreclr/jit/ICorJitInfo_API_names.h @@ -167,8 +167,8 @@ DEF_CLR_API(setEHinfo) DEF_CLR_API(logMsg) DEF_CLR_API(doAssert) DEF_CLR_API(reportFatalError) -DEF_CLR_API(allocMethodBlockCounts) -DEF_CLR_API(getMethodBlockCounts) +DEF_CLR_API(getPgoInstrumentationResults) +DEF_CLR_API(allocPgoInstrumentationBySchema) DEF_CLR_API(getLikelyClass) DEF_CLR_API(recordCallSite) DEF_CLR_API(recordRelocation) diff --git a/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp b/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp index 0788bc6af4e3c..42d2b55b7303d 100644 --- a/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp +++ b/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp @@ -1602,25 +1602,27 @@ void WrapICorJitInfo::reportFatalError( API_LEAVE(reportFatalError); } -HRESULT WrapICorJitInfo::allocMethodBlockCounts( - UINT32 count, - ICorJitInfo::BlockCounts** pBlockCounts) +HRESULT WrapICorJitInfo::getPgoInstrumentationResults( + CORINFO_METHOD_HANDLE ftnHnd, + PgoInstrumentationSchema** pSchema, + UINT32* pCountSchemaItems, + BYTE** pInstrumentationData) { - API_ENTER(allocMethodBlockCounts); - HRESULT temp = wrapHnd->allocMethodBlockCounts(count, pBlockCounts); - API_LEAVE(allocMethodBlockCounts); + API_ENTER(getPgoInstrumentationResults); + HRESULT temp = wrapHnd->getPgoInstrumentationResults(ftnHnd, pSchema, pCountSchemaItems, pInstrumentationData); + API_LEAVE(getPgoInstrumentationResults); return temp; } -HRESULT WrapICorJitInfo::getMethodBlockCounts( +HRESULT WrapICorJitInfo::allocPgoInstrumentationBySchema( CORINFO_METHOD_HANDLE ftnHnd, - UINT32* pCount, - ICorJitInfo::BlockCounts** pBlockCounts, - UINT32* pNumRuns) + PgoInstrumentationSchema* pSchema, + UINT32 countSchemaItems, + BYTE** pInstrumentationData) { - API_ENTER(getMethodBlockCounts); - HRESULT temp = wrapHnd->getMethodBlockCounts(ftnHnd, pCount, pBlockCounts, pNumRuns); - API_LEAVE(getMethodBlockCounts); + API_ENTER(allocPgoInstrumentationBySchema); + HRESULT temp = wrapHnd->allocPgoInstrumentationBySchema(ftnHnd, pSchema, countSchemaItems, pInstrumentationData); + API_LEAVE(allocPgoInstrumentationBySchema); return temp; } diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index a479bb88fcba4..2843a57c4adce 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2875,43 +2875,62 @@ void Compiler::compInitOptions(JitFlags* jitFlags) // Profile data // - fgBlockCounts = nullptr; + fgPgoSchema = nullptr; + fgPgoData = nullptr; + fgPgoSchemaCount = 0; fgProfileData_ILSizeMismatch = false; fgNumProfileRuns = 0; if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT)) { HRESULT hr; - hr = info.compCompHnd->getMethodBlockCounts(info.compMethodHnd, &fgBlockCountsCount, &fgBlockCounts, - &fgNumProfileRuns); + hr = info.compCompHnd->getPgoInstrumentationResults(info.compMethodHnd, &fgPgoSchema, &fgPgoSchemaCount, + &fgPgoData); - JITDUMP("BBOPT set -- VM query for profile data for %s returned: hr=%0x; counts at %p, %d blocks, %d runs\n", - info.compFullName, hr, fgBlockCounts, fgBlockCountsCount, fgNumProfileRuns); + if (SUCCEEDED(hr)) + { + fgNumProfileRuns = 0; + for (UINT32 iSchema = 0; iSchema < fgPgoSchemaCount; iSchema++) + { + if (fgPgoSchema[iSchema].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::NumRuns) + { + fgNumProfileRuns += fgPgoSchema[iSchema].Other; + } + } - // a failed result that also has a non-NULL fgBlockCounts + if (fgNumProfileRuns == 0) + fgNumProfileRuns = 1; + } + + JITDUMP("BBOPT set -- VM query for profile data for %s returned: hr=%0x; schema at %p, counts at %p, %d schema " + "elements, %d runs\n", + info.compFullName, hr, fgPgoSchema, fgPgoData, fgPgoSchemaCount, fgNumProfileRuns); + + // a failed result that also has a non-NULL fgPgoSchema // indicates that the ILSize for the method no longer matches // the ILSize for the method when profile data was collected. // // We will discard the IBC data in this case // - if (FAILED(hr) && (fgBlockCounts != nullptr)) + if (FAILED(hr) && (fgPgoSchema != nullptr)) { fgProfileData_ILSizeMismatch = true; - fgBlockCounts = nullptr; + fgPgoData = nullptr; + fgPgoSchema = nullptr; } #ifdef DEBUG - // A successful result implies a non-NULL fgBlockCounts + // A successful result implies a non-NULL fgPgoSchema // if (SUCCEEDED(hr)) { - assert(fgBlockCounts != nullptr); + assert(fgPgoSchema != nullptr); } - // A failed result implies a NULL fgBlockCounts + // A failed result implies a NULL fgPgoSchema // see implementation of Compiler::fgHaveProfileData() // if (FAILED(hr)) { - assert(fgBlockCounts == nullptr); + assert(fgPgoSchema == nullptr); } #endif } @@ -5780,8 +5799,19 @@ void Compiler::compCompileFinish() unsigned profCallCount = 0; if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT) && fgHaveProfileData()) { - assert(fgBlockCounts[0].ILOffset == 0); - profCallCount = fgBlockCounts[0].ExecutionCount; + bool foundEntrypointBasicBlockCount = false; + for (UINT32 iSchema = 0; iSchema < fgPgoSchemaCount; iSchema++) + { + if ((fgPgoSchema[iSchema].InstrumentationKind == + ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) && + (fgPgoSchema[iSchema].ILOffset == 0)) + { + foundEntrypointBasicBlockCount = true; + profCallCount = *(uint32_t*)(fgPgoData + fgPgoSchema[iSchema].Offset); + break; + } + } + assert(foundEntrypointBasicBlockCount); } static bool headerPrinted = false; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 8cf029f7d8c7d..68b1d343aa49d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5535,10 +5535,11 @@ class Compiler void fgAdjustForAddressExposedOrWrittenThis(); - bool fgProfileData_ILSizeMismatch; - ICorJitInfo::BlockCounts* fgBlockCounts; - UINT32 fgBlockCountsCount; - UINT32 fgNumProfileRuns; + bool fgProfileData_ILSizeMismatch; + ICorJitInfo::PgoInstrumentationSchema* fgPgoSchema; + BYTE* fgPgoData; + UINT32 fgPgoSchemaCount; + UINT32 fgNumProfileRuns; unsigned fgStressBBProf() { diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index efb3305a6206a..9314d48f4a063 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -176,9 +176,10 @@ void Compiler::fgInit() fgPreviousCandidateSIMDFieldAsgStmt = nullptr; #endif - fgHasSwitch = false; - fgBlockCounts = nullptr; - + fgHasSwitch = false; + fgPgoSchema = nullptr; + fgPgoData = nullptr; + fgPgoSchemaCount = 0; fgPredListSortVector = nullptr; } @@ -217,7 +218,7 @@ bool Compiler::fgHaveProfileData() return false; } - return (fgBlockCounts != nullptr); + return (fgPgoSchema != nullptr); } //------------------------------------------------------------------------ @@ -370,11 +371,12 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::wei return false; } - for (UINT32 i = 0; i < fgBlockCountsCount; i++) + for (UINT32 i = 0; i < fgPgoSchemaCount; i++) { - if (fgBlockCounts[i].ILOffset == offset) + if ((fgPgoSchema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) && + ((IL_OFFSET)fgPgoSchema[i].ILOffset == offset)) { - *weightWB = (BasicBlock::weight_t)fgBlockCounts[i].ExecutionCount; + *weightWB = (BasicBlock::weight_t) * (uint32_t*)(fgPgoData + fgPgoSchema[i].Offset); return true; } } @@ -383,6 +385,38 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::wei return true; } +template +class ClassProbeVisitor final : public GenTreeVisitor> +{ +public: + enum + { + DoPreOrder = true + }; + + TFunctor& m_functor; + Compiler* m_compiler; + + ClassProbeVisitor(Compiler* compiler, TFunctor& functor) + : GenTreeVisitor(compiler), m_functor(functor), m_compiler(compiler) + { + } + Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + { + GenTree* const node = *use; + if (node->IsCall()) + { + GenTreeCall* const call = node->AsCall(); + if (call->IsVirtual() && (call->gtCallType != CT_INDIRECT)) + { + m_functor(m_compiler, call); + } + } + + return Compiler::WALK_CONTINUE; + } +}; + //------------------------------------------------------------------------ // fgInstrumentMethod: add instrumentation probes to the method // @@ -407,6 +441,7 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::wei void Compiler::fgInstrumentMethod() { noway_assert(!compIsForInlining()); + jitstd::vector schema(getAllocator()); // Count the number of basic blocks in the method // that will get block count probes. @@ -415,10 +450,80 @@ void Compiler::fgInstrumentMethod() BasicBlock* block; for (block = fgFirstBB; (block != nullptr); block = block->bbNext) { - if (!(block->bbFlags & BBF_IMPORTED) || (block->bbFlags & BBF_INTERNAL)) + // We don't want to profile any un-imported blocks + // + if ((block->bbFlags & BBF_IMPORTED) == 0) + { + continue; + } + + if ((block->bbFlags & BBF_HAS_CLASS_PROFILE) != 0) + { + class BuildClassProbeSchemaGen + { + jitstd::vector* m_schema; + + public: + BuildClassProbeSchemaGen(jitstd::vector* schema) + : m_schema(schema) + { + } + void operator()(Compiler* compiler, GenTreeCall* call) + { + ICorJitInfo::PgoInstrumentationSchema schemaElem; + schemaElem.Count = 1; + schemaElem.Other = ICorJitInfo::ClassProfile::CLASS_FLAG; + if (call->IsVirtualStub()) + { + schemaElem.Other |= ICorJitInfo::ClassProfile::INTERFACE_FLAG; + } + else + { + assert(call->IsVirtualVtable()); + } + + schemaElem.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramCount; + schemaElem.ILOffset = jitGetILoffs(call->gtClassProfileCandidateInfo->ilOffset); + schemaElem.Offset = 0; + + m_schema->push_back(schemaElem); + + // Re-using ILOffset and Other fields from schema item for TypeHandleHistogramCount + schemaElem.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramTypeHandle; + schemaElem.Count = ICorJitInfo::ClassProfile::SIZE; + m_schema->push_back(schemaElem); + } + }; + // Scan the statements and identify the class probes + // + BuildClassProbeSchemaGen schemaGen(&schema); + ClassProbeVisitor visitor(this, schemaGen); + for (Statement* stmt : block->Statements()) + { + visitor.WalkTree(stmt->GetRootNodePointer(), nullptr); + } + } + + if (block->bbFlags & BBF_INTERNAL) { continue; } + + // Assign the current block's IL offset into the profile data + // (make sure IL offset is sane) + // + IL_OFFSET offset = block->bbCodeOffs; + assert((int)offset >= 0); + + ICorJitInfo::PgoInstrumentationSchema schemaElem; + schemaElem.Count = 1; + schemaElem.Other = 0; + schemaElem.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount; + schemaElem.ILOffset = offset; + schemaElem.Offset = 0; + + schema.push_back(schemaElem); + countOfBlocks++; } @@ -426,6 +531,7 @@ void Compiler::fgInstrumentMethod() // when importing. // int countOfCalls = info.compClassProbeCount; + assert(((countOfCalls * 2) + countOfBlocks) == (int)schema.size()); // Optionally bail out, if there are less than three blocks and no call sites to profile. // One block is common. We don't expect to see zero or two blocks here. @@ -444,16 +550,10 @@ void Compiler::fgInstrumentMethod() // Allocate the profile buffer // - // Allocation is in multiples of ICorJitInfo::BlockCounts. For each profile table we need - // some multiple of these. - // - const unsigned entriesPerCall = sizeof(ICorJitInfo::ClassProfile) / sizeof(ICorJitInfo::BlockCounts); - assert(entriesPerCall * sizeof(ICorJitInfo::BlockCounts) == sizeof(ICorJitInfo::ClassProfile)); - - const unsigned totalEntries = countOfBlocks + entriesPerCall * countOfCalls; - ICorJitInfo::BlockCounts* profileBlockCountsStart = nullptr; + BYTE* profileMemory; - HRESULT res = info.compCompHnd->allocMethodBlockCounts(totalEntries, &profileBlockCountsStart); + HRESULT res = info.compCompHnd->allocPgoInstrumentationBySchema(info.compMethodHnd, schema.data(), + (UINT32)schema.size(), &profileMemory); // We may not be able to instrument, if so we'll set this false. // We can't just early exit, because we have to clean up calls that we might have profiled. @@ -472,9 +572,6 @@ void Compiler::fgInstrumentMethod() } } - ICorJitInfo::BlockCounts* profileBlockCountsEnd = &profileBlockCountsStart[countOfBlocks]; - ICorJitInfo::BlockCounts* profileEnd = &profileBlockCountsStart[totalEntries]; - // For each BasicBlock (non-Internal) // 1. Assign the blocks bbCodeOffs to the ILOffset field of this blocks profile data. // 2. Add an operation that increments the ExecutionCount field at the beginning of the block. @@ -482,7 +579,10 @@ void Compiler::fgInstrumentMethod() // Each (non-Internal) block has it own BlockCounts tuple [ILOffset, ExecutionCount] // To start we initialize our current one with the first one that we allocated // - ICorJitInfo::BlockCounts* currentBlockCounts = profileBlockCountsStart; + int currentSchemaIndex = 0; + + // Hold the address of the first blocks ExecutionCount + size_t addrOfFirstExecutionCount = 0; for (block = fgFirstBB; (block != nullptr); block = block->bbNext) { @@ -508,130 +608,93 @@ void Compiler::fgInstrumentMethod() // JITDUMP("Scanning for calls to profile in " FMT_BB "\n", block->bbNum); - class ClassProbeVisitor final : public GenTreeVisitor + class ClassProbeInserter { - public: - enum - { - DoPreOrder = true - }; - - int m_count; - ICorJitInfo::ClassProfile* m_tableBase; - bool m_instrument; + jitstd::vector* m_schema; + BYTE* m_profileMemory; + int* m_currentSchemaIndex; + bool m_instrument; - ClassProbeVisitor(Compiler* compiler, ICorJitInfo::ClassProfile* tableBase, bool instrument) - : GenTreeVisitor(compiler) - , m_count(0) - , m_tableBase(tableBase) + public: + int m_count = 0; + + ClassProbeInserter(jitstd::vector* schema, + BYTE* profileMemory, + int* pCurrentSchemaIndex, + bool instrument) + : m_schema(schema) + , m_profileMemory(profileMemory) + , m_currentSchemaIndex(pCurrentSchemaIndex) , m_instrument(instrument) { } - Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + void operator()(Compiler* compiler, GenTreeCall* call) { - GenTree* const node = *use; - if (node->IsCall()) + JITDUMP("Found call [%06u] with probe index %d and ilOffset 0x%X\n", compiler->dspTreeID(call), + call->gtClassProfileCandidateInfo->probeIndex, + call->gtClassProfileCandidateInfo->ilOffset); + + m_count++; + if (m_instrument) { - GenTreeCall* const call = node->AsCall(); - if (call->IsVirtual() && (call->gtCallType != CT_INDIRECT)) - { - JITDUMP("Found call [%06u] with probe index %d and ilOffset 0x%X\n", - m_compiler->dspTreeID(call), call->gtClassProfileCandidateInfo->probeIndex, - call->gtClassProfileCandidateInfo->ilOffset); + // We transform the call from (CALLVIRT obj, ... args ...) to + // to + // (CALLVIRT + // (COMMA + // (ASG tmp, obj) + // (COMMA + // (CALL probe_fn tmp, &probeEntry) + // tmp))) + // ... args ...) + // - m_count++; + assert(call->gtCallThisArg->GetNode()->TypeGet() == TYP_REF); - if (m_instrument) - { - // We transform the call from (CALLVIRT obj, ... args ...) to - // to - // (CALLVIRT - // (COMMA - // (ASG tmp, obj) - // (COMMA - // (CALL probe_fn tmp, &probeEntry) - // tmp))) - // ... args ...) - // - - assert(call->gtCallThisArg->GetNode()->TypeGet() == TYP_REF); - - // Figure out where the table is located. - // - ICorJitInfo::ClassProfile* classProfile = - &m_tableBase[call->gtClassProfileCandidateInfo->probeIndex]; - - // Grab a temp to hold the 'this' object as it will be used three times - // - unsigned const tmpNum = m_compiler->lvaGrabTemp(true DEBUGARG("class profile tmp")); - m_compiler->lvaTable[tmpNum].lvType = TYP_REF; - - // Generate the IR... - // - GenTree* const classProfileNode = - m_compiler->gtNewIconNode((ssize_t)classProfile, TYP_I_IMPL); - GenTree* const tmpNode = m_compiler->gtNewLclvNode(tmpNum, TYP_REF); - GenTreeCall::Use* const args = m_compiler->gtNewCallArgs(tmpNode, classProfileNode); - GenTree* const helperCallNode = - m_compiler->gtNewHelperCallNode(CORINFO_HELP_CLASSPROFILE, TYP_VOID, args); - GenTree* const tmpNode2 = m_compiler->gtNewLclvNode(tmpNum, TYP_REF); - GenTree* const callCommaNode = - m_compiler->gtNewOperNode(GT_COMMA, TYP_REF, helperCallNode, tmpNode2); - GenTree* const tmpNode3 = m_compiler->gtNewLclvNode(tmpNum, TYP_REF); - GenTree* const asgNode = m_compiler->gtNewOperNode(GT_ASG, TYP_REF, tmpNode3, - call->gtCallThisArg->GetNode()); - GenTree* const asgCommaNode = - m_compiler->gtNewOperNode(GT_COMMA, TYP_REF, asgNode, callCommaNode); - - // Update the call - // - call->gtCallThisArg->SetNode(asgCommaNode); - - JITDUMP("Modified call is now\n"); - DISPTREE(call); - - // Initialize the class table - // - // Hack: we use two high bits of the offset to indicate that this record - // is the start of a class profile, and what kind of call is being profiled. - // - IL_OFFSET offset = jitGetILoffs(call->gtClassProfileCandidateInfo->ilOffset); - assert((offset & (ICorJitInfo::ClassProfile::CLASS_FLAG | - ICorJitInfo::ClassProfile::INTERFACE_FLAG)) == 0); - - offset |= ICorJitInfo::ClassProfile::CLASS_FLAG; - - if (call->IsVirtualStub()) - { - offset |= ICorJitInfo::ClassProfile::INTERFACE_FLAG; - } - else - { - assert(call->IsVirtualVtable()); - } + // Figure out where the table is located. + // + BYTE* classProfile = (*m_schema)[*m_currentSchemaIndex].Offset + m_profileMemory; + *m_currentSchemaIndex += 2; // There are 2 schema entries per class probe - classProfile->ILOffset = offset; - classProfile->Count = 0; + // Grab a temp to hold the 'this' object as it will be used three times + // + unsigned const tmpNum = compiler->lvaGrabTemp(true DEBUGARG("class profile tmp")); + compiler->lvaTable[tmpNum].lvType = TYP_REF; - for (int i = 0; i < ICorJitInfo::ClassProfile::SIZE; i++) - { - classProfile->ClassTable[i] = NO_CLASS_HANDLE; - } - } + // Generate the IR... + // + GenTree* const classProfileNode = + compiler->gtNewIconNode((ssize_t)classProfile, TYP_I_IMPL); + GenTree* const tmpNode = compiler->gtNewLclvNode(tmpNum, TYP_REF); + GenTreeCall::Use* const args = compiler->gtNewCallArgs(tmpNode, classProfileNode); + GenTree* const helperCallNode = + compiler->gtNewHelperCallNode(CORINFO_HELP_CLASSPROFILE, TYP_VOID, args); + GenTree* const tmpNode2 = compiler->gtNewLclvNode(tmpNum, TYP_REF); + GenTree* const callCommaNode = + compiler->gtNewOperNode(GT_COMMA, TYP_REF, helperCallNode, tmpNode2); + GenTree* const tmpNode3 = compiler->gtNewLclvNode(tmpNum, TYP_REF); + GenTree* const asgNode = + compiler->gtNewOperNode(GT_ASG, TYP_REF, tmpNode3, call->gtCallThisArg->GetNode()); + GenTree* const asgCommaNode = + compiler->gtNewOperNode(GT_COMMA, TYP_REF, asgNode, callCommaNode); + + // Update the call + // + call->gtCallThisArg->SetNode(asgCommaNode); - // Restore the stub address on call, whether instrumenting or not. - // - call->gtStubCallStubAddr = call->gtClassProfileCandidateInfo->stubAddr; - } + JITDUMP("Modified call is now\n"); + DISPTREE(call); } - return Compiler::WALK_CONTINUE; + // Restore the stub address on the call, whether instrumenting or not. + // + call->gtStubCallStubAddr = call->gtClassProfileCandidateInfo->stubAddr; } }; // Scan the statements and add class probes // - ClassProbeVisitor visitor(this, (ICorJitInfo::ClassProfile*)profileBlockCountsEnd, instrument); + ClassProbeInserter insertProbes(&schema, profileMemory, ¤tSchemaIndex, instrument); + ClassProbeVisitor visitor(this, insertProbes); for (Statement* stmt : block->Statements()) { visitor.WalkTree(stmt->GetRootNodePointer(), nullptr); @@ -639,8 +702,8 @@ void Compiler::fgInstrumentMethod() // Bookkeeping // - assert(visitor.m_count <= countOfCalls); - countOfCalls -= visitor.m_count; + assert(insertProbes.m_count <= countOfCalls); + countOfCalls -= insertProbes.m_count; JITDUMP("\n%d calls remain to be visited\n", countOfCalls); } else @@ -664,16 +727,11 @@ void Compiler::fgInstrumentMethod() if (instrument) { - // Assign the current block's IL offset into the profile data - // (make sure IL offset is sane) - // - IL_OFFSET offset = block->bbCodeOffs; - assert((int)offset >= 0); - - currentBlockCounts->ILOffset = offset; - currentBlockCounts->ExecutionCount = 0; - - size_t addrOfCurrentExecutionCount = (size_t)¤tBlockCounts->ExecutionCount; + assert(block->bbCodeOffs == (IL_OFFSET)schema[currentSchemaIndex].ILOffset); + size_t addrOfCurrentExecutionCount = (size_t)(schema[currentSchemaIndex].Offset + profileMemory); + if (addrOfFirstExecutionCount == 0) + addrOfFirstExecutionCount = addrOfCurrentExecutionCount; + currentSchemaIndex++; // Read Basic-Block count value GenTree* valueNode = @@ -687,9 +745,6 @@ void Compiler::fgInstrumentMethod() GenTree* asgNode = gtNewAssignNode(lhsNode, rhsNode); fgNewStmtAtBeg(block, asgNode); - - // Advance to the next BlockCounts tuple [ILOffset, ExecutionCount] - currentBlockCounts++; } } @@ -702,7 +757,6 @@ void Compiler::fgInstrumentMethod() // noway_assert(countOfBlocks == 0); noway_assert(countOfCalls == 0); - assert(currentBlockCounts == profileBlockCountsEnd); // When prejitting, add the method entry callback node if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) @@ -733,9 +787,6 @@ void Compiler::fgInstrumentMethod() GenTreeCall::Use* args = gtNewCallArgs(arg); GenTree* call = gtNewHelperCallNode(CORINFO_HELP_BBT_FCN_ENTER, TYP_VOID, args); - // Get the address of the first blocks ExecutionCount - size_t addrOfFirstExecutionCount = (size_t)&profileBlockCountsStart->ExecutionCount; - // Read Basic-Block count value GenTree* valueNode = gtNewIndOfIconHandleNode(TYP_INT, addrOfFirstExecutionCount, GTF_ICON_BBC_PTR, false); diff --git a/src/coreclr/jit/jitstd/vector.h b/src/coreclr/jit/jitstd/vector.h index 4e603ec0a35a7..9117aa5282d41 100644 --- a/src/coreclr/jit/jitstd/vector.h +++ b/src/coreclr/jit/jitstd/vector.h @@ -221,6 +221,8 @@ class vector size_type size() const; + T* data() { return m_pArray; } + void swap(vector& vec); private: diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoBase.cs b/src/coreclr/tools/Common/JitInterface/CorInfoBase.cs index 5808db5f534f7..5405efba6c80c 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoBase.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoBase.cs @@ -2416,12 +2416,12 @@ static void _reportFatalError(IntPtr thisHandle, IntPtr* ppException, CorJitResu } [UnmanagedCallersOnly] - static HRESULT _allocMethodBlockCounts(IntPtr thisHandle, IntPtr* ppException, uint count, BlockCounts** pBlockCounts) + static HRESULT _getPgoInstrumentationResults(IntPtr thisHandle, IntPtr* ppException, CORINFO_METHOD_STRUCT_* ftnHnd, PgoInstrumentationSchema** pSchema, uint* pCountSchemaItems, byte** pInstrumentationData) { var _this = GetThis(thisHandle); try { - return _this.allocMethodBlockCounts(count, ref *pBlockCounts); + return _this.getPgoInstrumentationResults(ftnHnd, ref *pSchema, ref *pCountSchemaItems, pInstrumentationData); } catch (Exception ex) { @@ -2431,12 +2431,12 @@ static HRESULT _allocMethodBlockCounts(IntPtr thisHandle, IntPtr* ppException, u } [UnmanagedCallersOnly] - static HRESULT _getMethodBlockCounts(IntPtr thisHandle, IntPtr* ppException, CORINFO_METHOD_STRUCT_* ftnHnd, uint* pCount, BlockCounts** pBlockCounts, uint* pNumRuns) + static HRESULT _allocPgoInstrumentationBySchema(IntPtr thisHandle, IntPtr* ppException, CORINFO_METHOD_STRUCT_* ftnHnd, PgoInstrumentationSchema* pSchema, uint countSchemaItems, byte** pInstrumentationData) { var _this = GetThis(thisHandle); try { - return _this.getMethodBlockCounts(ftnHnd, ref *pCount, ref *pBlockCounts, ref *pNumRuns); + return _this.allocPgoInstrumentationBySchema(ftnHnd, pSchema, countSchemaItems, pInstrumentationData); } catch (Exception ex) { @@ -2701,8 +2701,8 @@ static IntPtr GetUnmanagedCallbacks() callbacks[160] = (delegate* unmanaged)&_logMsg; callbacks[161] = (delegate* unmanaged)&_doAssert; callbacks[162] = (delegate* unmanaged)&_reportFatalError; - callbacks[163] = (delegate* unmanaged)&_allocMethodBlockCounts; - callbacks[164] = (delegate* unmanaged)&_getMethodBlockCounts; + callbacks[163] = (delegate* unmanaged)&_getPgoInstrumentationResults; + callbacks[164] = (delegate* unmanaged)&_allocPgoInstrumentationBySchema; callbacks[165] = (delegate* unmanaged)&_getLikelyClass; callbacks[166] = (delegate* unmanaged)&_recordCallSite; callbacks[167] = (delegate* unmanaged)&_recordRelocation; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs index 1aadd4e266454..511199c8edc66 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs @@ -296,6 +296,44 @@ public unsafe struct CORINFO_RESOLVED_TOKEN public uint cbMethodSpec; } + [StructLayout(LayoutKind.Sequential)] + public struct PgoInstrumentationSchema + { + public IntPtr Offset; + public PgoInstrumentationKind InstrumentationKind; + public int ILOffset; + public int Count; + public int Other; + } + + public enum PgoInstrumentationKind + { + // Schema data types + None = 0, + FourByte = 1, + EightByte = 2, + TypeHandle = 3, + + // Mask of all schema data types + MarshalMask = 0xF, + + // ExcessAlignment + Align4Byte = 0x10, + Align8Byte = 0x20, + AlignPointer = 0x30, + + // Mask of all schema data types + AlignMask = 0x30, + + DescriptorMin = 0x40, + + Done = None, // All instrumentation schemas must end with a record which is "Done" + BasicBlockIntCount = DescriptorMin | FourByte, // 4 byte basic block counter, using unsigned 4 byte int + TypeHandleHistogramCount = (DescriptorMin * 1) | FourByte | AlignPointer, // 4 byte counter that is part of a type histogram + TypeHandleHistogramTypeHandle = (DescriptorMin * 1) | TypeHandle, // TypeHandle that is part of a type histogram + Version = (DescriptorMin * 2) | None, // Version is encoded in the Other field of the schema + NumRuns = (DescriptorMin * 3) | None, // Number of runs is encoded in the Other field of the schema + } // Flags computed by a runtime compiler public enum CorInfoMethodRuntimeFlags diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt index 46a18235a7cc9..1b6b4e8f259ca 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt @@ -57,6 +57,7 @@ const char *,byte* mdMethodDef,mdToken,unsigned int mdToken,,unsigned int BYTE*,byte*,unsigned char* +BYTE**,byte**,unsigned char** GSCookie*,IntPtr*,void* GSCookie**,IntPtr**,void** @@ -108,6 +109,8 @@ unsigned int*,ref uint CORINFO_JUST_MY_CODE_HANDLE**,ref CORINFO_JUST_MY_CODE_HANDLE_*,void** ICorJitInfo::BlockCounts**,ref BlockCounts*,void** +PgoInstrumentationSchema**,ref PgoInstrumentationSchema*,void** +PgoInstrumentationSchema*,PgoInstrumentationSchema*,void* ; Enums CorInfoClassId,,int @@ -321,8 +324,8 @@ FUNCTIONS bool logMsg(unsigned level, const char* fmt, va_list args) int doAssert(const char* szFile, int iLine, const char* szExpr) void reportFatalError(CorJitResult result) - HRESULT allocMethodBlockCounts(UINT32 count, ICorJitInfo::BlockCounts** pBlockCounts) - HRESULT getMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd, UINT32* pCount, ICorJitInfo::BlockCounts** pBlockCounts, UINT32* pNumRuns) + HRESULT getPgoInstrumentationResults(CORINFO_METHOD_HANDLE ftnHnd, PgoInstrumentationSchema** pSchema, UINT32* pCountSchemaItems, BYTE**pInstrumentationData) + HRESULT allocPgoInstrumentationBySchema(CORINFO_METHOD_HANDLE ftnHnd, PgoInstrumentationSchema* pSchema, UINT32 countSchemaItems, BYTE** pInstrumentationData) CORINFO_CLASS_HANDLE getLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, CORINFO_CLASS_HANDLE baseHnd, UINT32 ilOffset, UINT32* pLikelihood, UINT32* pNumberOfClasses) void recordCallSite(ULONG instrOffset, CORINFO_SIG_INFO* callSig, CORINFO_METHOD_HANDLE methodHandle) void recordRelocation(void* location, void* target, WORD fRelocType, WORD slotNum, INT32 addlDelta) diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index 355514a4f41a9..4cd0a16083ba6 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -2304,14 +2304,14 @@ partial void findKnownBBCountBlock(ref BlockType blockType, void* location, ref blockType = BlockType.Unknown; } - private HRESULT allocMethodBlockCounts(uint count, ref BlockCounts* pBlockCounts) + private unsafe HRESULT allocPgoInstrumentationBySchema(CORINFO_METHOD_STRUCT_* ftnHnd, PgoInstrumentationSchema* pSchema, uint countSchemaItems, byte** pInstrumentationData) { CORJIT_FLAGS flags = default(CORJIT_FLAGS); getJitFlags(ref flags, 0); + *pInstrumentationData = null; if (flags.IsSet(CorJitFlag.CORJIT_FLAG_IL_STUB)) { - pBlockCounts = null; return HRESULT.E_NOTIMPL; } @@ -2319,17 +2319,40 @@ private HRESULT allocMethodBlockCounts(uint count, ref BlockCounts* pBlockCounts EcmaMethod ecmaMethod = _methodCodeNode.Method.GetTypicalMethodDefinition() as EcmaMethod; if (ecmaMethod == null) { - pBlockCounts = null; + return HRESULT.E_NOTIMPL; + } + + // Only allocation of PGO data for the current method is supported. + if (_methodCodeNode.Method != HandleToObject(ftnHnd)) + { return HRESULT.E_NOTIMPL; } if (!_compilation.IsModuleInstrumented(ecmaMethod.Module)) { - pBlockCounts = null; return HRESULT.E_NOTIMPL; } - pBlockCounts = (BlockCounts*)GetPin(_bbCounts = new byte[count * sizeof(BlockCounts)]); + // Validate that each schema item is only used for a basic block count + for (uint iSchema = 0; iSchema < countSchemaItems; iSchema++) + { + if (pSchema[iSchema].InstrumentationKind != PgoInstrumentationKind.BasicBlockIntCount) + return HRESULT.E_NOTIMPL; + if (pSchema[iSchema].Count != 1) + return HRESULT.E_NOTIMPL; + } + + BlockCounts* blockCounts = (BlockCounts*)GetPin(_bbCounts = new byte[countSchemaItems * sizeof(BlockCounts)]); + *pInstrumentationData = (byte*)blockCounts; + + for (uint iSchema = 0; iSchema < countSchemaItems; iSchema++) + { + // Update schema have correct offsets + pSchema[iSchema].Offset = new IntPtr((byte*)&blockCounts[iSchema].ExecutionCount - (byte*)blockCounts); + // Insert IL Offsets into block data to match schema + blockCounts[iSchema].ILOffset = (uint)pSchema[iSchema].ILOffset; + } + if (_profileDataNode == null) { _profileDataNode = _compilation.NodeFactory.ProfileData(_methodCodeNode); @@ -2337,8 +2360,8 @@ private HRESULT allocMethodBlockCounts(uint count, ref BlockCounts* pBlockCounts return 0; } - private HRESULT getMethodBlockCounts(CORINFO_METHOD_STRUCT_* ftnHnd, ref uint pCount, ref BlockCounts* pBlockCounts, ref uint pNumRuns) - { throw new NotImplementedException("getBBProfileData"); } + private HRESULT getPgoInstrumentationResults(CORINFO_METHOD_STRUCT_* ftnHnd, ref PgoInstrumentationSchema* pSchema, ref uint pCountSchemaItems, byte** pInstrumentationData) + { throw new NotImplementedException("getPgoInstrumentationResults"); } private CORINFO_CLASS_STRUCT_* getLikelyClass(CORINFO_METHOD_STRUCT_* ftnHnd, CORINFO_CLASS_STRUCT_* baseHnd, uint IlOffset, ref uint pLikelihood, ref uint pNumberOfClasses) { diff --git a/src/coreclr/tools/aot/jitinterface/jitinterface.h b/src/coreclr/tools/aot/jitinterface/jitinterface.h index 1d94c382277e5..8d6df6f35572b 100644 --- a/src/coreclr/tools/aot/jitinterface/jitinterface.h +++ b/src/coreclr/tools/aot/jitinterface/jitinterface.h @@ -173,8 +173,8 @@ struct JitInterfaceCallbacks bool (* logMsg)(void * thisHandle, CorInfoExceptionClass** ppException, unsigned level, const char* fmt, va_list args); int (* doAssert)(void * thisHandle, CorInfoExceptionClass** ppException, const char* szFile, int iLine, const char* szExpr); void (* reportFatalError)(void * thisHandle, CorInfoExceptionClass** ppException, int result); - int (* allocMethodBlockCounts)(void * thisHandle, CorInfoExceptionClass** ppException, unsigned int count, void** pBlockCounts); - int (* getMethodBlockCounts)(void * thisHandle, CorInfoExceptionClass** ppException, void* ftnHnd, unsigned int* pCount, void** pBlockCounts, unsigned int* pNumRuns); + int (* getPgoInstrumentationResults)(void * thisHandle, CorInfoExceptionClass** ppException, void* ftnHnd, void** pSchema, unsigned int* pCountSchemaItems, unsigned char** pInstrumentationData); + int (* allocPgoInstrumentationBySchema)(void * thisHandle, CorInfoExceptionClass** ppException, void* ftnHnd, void* pSchema, unsigned int countSchemaItems, unsigned char** pInstrumentationData); void* (* getLikelyClass)(void * thisHandle, CorInfoExceptionClass** ppException, void* ftnHnd, void* baseHnd, unsigned int ilOffset, unsigned int* pLikelihood, unsigned int* pNumberOfClasses); void (* recordCallSite)(void * thisHandle, CorInfoExceptionClass** ppException, unsigned int instrOffset, void* callSig, void* methodHandle); void (* recordRelocation)(void * thisHandle, CorInfoExceptionClass** ppException, void* location, void* target, unsigned short fRelocType, unsigned short slotNum, int addlDelta); @@ -1769,24 +1769,26 @@ class JitInterfaceWrapper if (pException != nullptr) throw pException; } - virtual int allocMethodBlockCounts( - unsigned int count, - void** pBlockCounts) + virtual int getPgoInstrumentationResults( + void* ftnHnd, + void** pSchema, + unsigned int* pCountSchemaItems, + unsigned char** pInstrumentationData) { CorInfoExceptionClass* pException = nullptr; - int temp = _callbacks->allocMethodBlockCounts(_thisHandle, &pException, count, pBlockCounts); + int temp = _callbacks->getPgoInstrumentationResults(_thisHandle, &pException, ftnHnd, pSchema, pCountSchemaItems, pInstrumentationData); if (pException != nullptr) throw pException; return temp; } - virtual int getMethodBlockCounts( + virtual int allocPgoInstrumentationBySchema( void* ftnHnd, - unsigned int* pCount, - void** pBlockCounts, - unsigned int* pNumRuns) + void* pSchema, + unsigned int countSchemaItems, + unsigned char** pInstrumentationData) { CorInfoExceptionClass* pException = nullptr; - int temp = _callbacks->getMethodBlockCounts(_thisHandle, &pException, ftnHnd, pCount, pBlockCounts, pNumRuns); + int temp = _callbacks->allocPgoInstrumentationBySchema(_thisHandle, &pException, ftnHnd, pSchema, countSchemaItems, pInstrumentationData); if (pException != nullptr) throw pException; return temp; } diff --git a/src/coreclr/vm/ClrEtwAll.man b/src/coreclr/vm/ClrEtwAll.man index cf3666a9abc6f..f27bf4feb2f9f 100644 --- a/src/coreclr/vm/ClrEtwAll.man +++ b/src/coreclr/vm/ClrEtwAll.man @@ -85,6 +85,8 @@ message="$(string.RuntimePublisher.MethodDiagnosticKeywordMessage)" symbol="CLR_METHODDIAGNOSTIC_KEYWORD" /> + @@ -412,8 +414,15 @@ - - + + + + + + + @@ -2831,6 +2840,42 @@ + + + + @@ -3845,6 +3890,16 @@ keywords ="AssemblyLoaderKeyword" opcode="KnownPathProbed" task="AssemblyLoader" symbol="KnownPathProbed" message="$(string.RuntimePublisher.KnownPathProbedEventMessage)"/> + + + + @@ -7076,6 +7131,7 @@ + @@ -7272,6 +7328,7 @@ + @@ -7599,6 +7656,7 @@ + @@ -7738,6 +7796,8 @@ + + diff --git a/src/coreclr/vm/dynamicmethod.h b/src/coreclr/vm/dynamicmethod.h index 0d6599f880bba..c287e31456d3f 100644 --- a/src/coreclr/vm/dynamicmethod.h +++ b/src/coreclr/vm/dynamicmethod.h @@ -68,6 +68,11 @@ class DynamicResolver unsigned * pEHSize) = 0; virtual SigPointer GetLocalSig() = 0; +#ifdef FEATURE_PGO + virtual PgoManager* volatile* GetDynamicPgoManagerPointer() { return NULL; } + PgoManager* GetDynamicPgoManager() { return NULL; } +#endif + // // jit interface api virtual OBJECTHANDLE ConstructStringLiteral(mdToken metaTok) = 0; @@ -136,6 +141,10 @@ class LCGMethodResolver : public DynamicResolver STRINGREF GetStringLiteral(mdToken token); STRINGREF * GetOrInternString(STRINGREF *pString); void AddToUsedIndCellList(BYTE * indcell); +#ifdef FEATURE_PGO + PgoManager* volatile* GetDynamicPgoManagerPointer() { return &m_pgoManager; } + PgoManager* GetDynamicPgoManager() { return m_pgoManager; } +#endif private: void RecycleIndCells(); @@ -163,6 +172,10 @@ class LCGMethodResolver : public DynamicResolver DynamicStringLiteral* m_DynamicStringLiterals; IndCellList * m_UsedIndCellList; // list to keep track of all the indirection cells used by the jitted code ExecutionManager::JumpStubCache * m_pJumpStubCache; + +#ifdef FEATURE_PGO + Volatile m_pgoManager; +#endif // FEATURE_PGO }; // class LCGMethodResolver //--------------------------------------------------------------------------------------- diff --git a/src/coreclr/vm/eventtrace.cpp b/src/coreclr/vm/eventtrace.cpp index 14da101cd8c6d..2dfd8a0b47dde 100644 --- a/src/coreclr/vm/eventtrace.cpp +++ b/src/coreclr/vm/eventtrace.cpp @@ -5395,6 +5395,70 @@ VOID ETW::MethodLog::GetR2RGetEntryPointStart(MethodDesc *pMethodDesc) } } +VOID ETW::MethodLog::LogMethodInstrumentationData(MethodDesc* method, uint32_t cbData, BYTE *data) +{ + CONTRACTL{ + NOTHROW; + GC_TRIGGERS; + } CONTRACTL_END; + const uint32_t chunkSize = 40000; + const uint32_t maxDataSize = chunkSize * 0x1000; + const uint32_t FinalChunkFlag = 0x80000000; + + if (ETW_EVENT_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_DOTNET_Context, JitInstrumentationDataVerbose)) + { + EX_TRY + { + SendMethodDetailsEvent(method); + ULONG ulMethodToken=0; + auto pModule = method->GetModule_NoLogging(); + bool bIsDynamicMethod = method->IsDynamicMethod(); + BOOL bIsGenericMethod = FALSE; + if(method->GetMethodTable_NoLogging()) + bIsGenericMethod = method->HasClassOrMethodInstantiation_NoLogging(); + + // Use MethodDesc if Dynamic or Generic methods + if( bIsDynamicMethod || bIsGenericMethod) + { + if(bIsGenericMethod) + ulMethodToken = (ULONG)method->GetMemberDef_NoLogging(); + if(bIsDynamicMethod) // if its a generic and a dynamic method, we would set the methodtoken to 0 + ulMethodToken = (ULONG)0; + } + else + ulMethodToken = (ULONG)method->GetMemberDef_NoLogging(); + + SString tNamespace, tMethodName, tMethodSignature; + method->GetMethodInfo(tNamespace, tMethodName, tMethodSignature); + + PCWSTR pNamespace = (PCWSTR)tNamespace.GetUnicode(); + PCWSTR pMethodName = (PCWSTR)tMethodName.GetUnicode(); + PCWSTR pMethodSignature = (PCWSTR)tMethodSignature.GetUnicode(); + + // Send data in 40,000 byte chunks + uint32_t chunkIndex = 0; + for (; cbData > 0; chunkIndex++) + { + bool finalChunk = cbData <= chunkSize; + uint32_t chunkSizeToEmit = finalChunk ? cbData : chunkSize; + + FireEtwJitInstrumentationDataVerbose( + GetClrInstanceId(), + chunkIndex | (finalChunk ? FinalChunkFlag : 0), + chunkSizeToEmit, + (ULONGLONG)(TADDR) method, + (ULONGLONG)(TADDR) pModule, + ulMethodToken, + pNamespace, + pMethodName, + pMethodSignature, + (BYTE*)data); + data += chunkSizeToEmit; + cbData -= chunkSizeToEmit; + } + } EX_CATCH{ } EX_END_CATCH(SwallowAllExceptions); + } +} /*******************************************************/ /* This is called by the runtime when a method is jitted completely */ diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index aa60a55ceb3e2..7e63ef114bc89 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -11948,10 +11948,12 @@ void* CEEJitInfo::getMethodSync(CORINFO_METHOD_HANDLE ftnHnd, } /*********************************************************************/ -HRESULT CEEJitInfo::allocMethodBlockCounts ( - UINT32 count, // count of tuples - ICorJitInfo::BlockCounts ** pBlockCounts // pointer to array of tuples - ) +HRESULT CEEJitInfo::allocPgoInstrumentationBySchema( + CORINFO_METHOD_HANDLE ftnHnd, /* IN */ + PgoInstrumentationSchema* pSchema, /* IN/OUT */ + UINT32 countSchemaItems, /* IN */ + BYTE** pInstrumentationData /* OUT */ + ) { CONTRACTL { THROWS; @@ -11985,7 +11987,7 @@ HRESULT CEEJitInfo::allocMethodBlockCounts ( hr = (*pBlockCounts != nullptr) ? S_OK : E_OUTOFMEMORY; #else // FEATURE_PREJIT #ifdef FEATURE_PGO - hr = PgoManager::allocMethodBlockCounts(m_pMethodBeingCompiled, count, pBlockCounts, codeSize); + hr = PgoManager::allocPgoInstrumentationBySchema(m_pMethodBeingCompiled, pSchema, countSchemaItems, pInstrumentationData); #else _ASSERTE(!"allocMethodBlockCounts not implemented on CEEJitInfo!"); hr = E_NOTIMPL; @@ -11999,12 +12001,13 @@ HRESULT CEEJitInfo::allocMethodBlockCounts ( // Consider implementing getBBProfileData on CEEJitInfo. This will allow us // to use profile info in codegen for non zapped images. -HRESULT CEEJitInfo::getMethodBlockCounts ( - CORINFO_METHOD_HANDLE ftnHnd, - UINT32 * pCount, // pointer to the count of tuples - ICorJitInfo::BlockCounts ** pBlockCounts, // pointer to array of tuples - UINT32 * pNumRuns - ) + +HRESULT CEEJitInfo::getPgoInstrumentationResults( + CORINFO_METHOD_HANDLE ftnHnd, + PgoInstrumentationSchema **pSchema, // pointer to the schema table which describes the instrumentation results (pointer will not remain valid after jit completes) + UINT32 * pCountSchemaItems, // pointer to the count schema items + BYTE ** pInstrumentationData // pointer to the actual instrumentation data (pointer will not remain valid after jit completes) + ) { CONTRACTL { THROWS; @@ -12013,33 +12016,47 @@ HRESULT CEEJitInfo::getMethodBlockCounts ( } CONTRACTL_END; HRESULT hr = E_FAIL; - *pCount = 0; - *pBlockCounts = NULL; - *pNumRuns = 0; + *pCountSchemaItems = 0; + *pInstrumentationData = NULL; JIT_TO_EE_TRANSITION(); #ifdef FEATURE_PGO MethodDesc* pMD = (MethodDesc*)ftnHnd; - unsigned codeSize = 0; - if (pMD->IsDynamicMethod()) + ComputedPgoData* pDataCur = m_foundPgoData; + + // Search linked list of previously found pgo information + for (; pDataCur != nullptr; pDataCur = pDataCur->m_next) { - unsigned stackSize, ehSize; - CorInfoOptions options; - DynamicResolver * pResolver = m_pMethodBeingCompiled->AsDynamicMethodDesc()->GetResolver(); - pResolver->GetCodeInfo(&codeSize, &stackSize, &options, &ehSize); + if (pDataCur->m_pMD == pMD) + { + *pSchema = pDataCur->m_schema.GetElements(); + *pCountSchemaItems = pDataCur->m_schema.GetCount(); + *pInstrumentationData = pDataCur->m_pInstrumentationData; + hr = pDataCur->m_hr; + break; + } } - else if (pMD->HasILHeader()) + + if (pDataCur == nullptr) { - COR_ILMETHOD_DECODER decoder(pMD->GetILHeader()); - codeSize = decoder.GetCodeSize(); + // If not found in previous list, gather it here, and add to linked list + NewHolder newPgoData = new ComputedPgoData(pMD); + newPgoData->m_next = m_foundPgoData; + m_foundPgoData = newPgoData; + newPgoData.SuppressRelease(); + + newPgoData->m_hr = PgoManager::getPgoInstrumentationResults(pMD, &newPgoData->m_schema, &newPgoData->m_pInstrumentationData); + pDataCur = m_foundPgoData; } - hr = PgoManager::getMethodBlockCounts(pMD, codeSize, pCount, pBlockCounts, pNumRuns); - + *pSchema = pDataCur->m_schema.GetElements(); + *pCountSchemaItems = pDataCur->m_schema.GetCount(); + *pInstrumentationData = pDataCur->m_pInstrumentationData; + hr = pDataCur->m_hr; #else - _ASSERTE(!"getMethodBlockCounts not implemented on CEEJitInfo!"); + _ASSERTE(!"getPgoInstrumentationResults not implemented on CEEJitInfo!"); hr = E_NOTIMPL; #endif @@ -14299,21 +14316,24 @@ void* CEEInfo::getMethodSync(CORINFO_METHOD_HANDLE ftnHnd, UNREACHABLE(); // only called on derived class. } -HRESULT CEEInfo::allocMethodBlockCounts ( - UINT32 count, // the count of tuples - BlockCounts ** pBlockCounts // pointer to array of tuples - ) +HRESULT CEEInfo::allocPgoInstrumentationBySchema( + CORINFO_METHOD_HANDLE ftnHnd, /* IN */ + PgoInstrumentationSchema* pSchema, /* IN/OUT */ + UINT32 countSchemaItems, /* IN */ + BYTE** pInstrumentationData /* OUT */ + ) { LIMITED_METHOD_CONTRACT; UNREACHABLE_RET(); // only called on derived class. } -HRESULT CEEInfo::getMethodBlockCounts( - CORINFO_METHOD_HANDLE ftnHnd, - UINT32 * pCount, // pointer to the count of tuples - BlockCounts ** pBlockCounts, // pointer to array of tuples - UINT32 * pNumRuns - ) + +HRESULT CEEInfo::getPgoInstrumentationResults( + CORINFO_METHOD_HANDLE ftnHnd, + PgoInstrumentationSchema **pSchema, // pointer to the schema table which describes the instrumentation results (pointer will not remain valid after jit completes) + UINT32 * pCountSchemaItems, // pointer to the count schema items + BYTE ** pInstrumentationData // pointer to the actual instrumentation data (pointer will not remain valid after jit completes) + ) { LIMITED_METHOD_CONTRACT; UNREACHABLE_RET(); // only called on derived class. diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index 8b681c4818e57..07a035a278611 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -20,7 +20,7 @@ #else // !TARGET_UNIX #define MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT ((GetOsPageSize() / 2) - 1) #endif // !TARGET_UNIX - +#include "pgo.h" enum StompWriteBarrierCompletionAction { @@ -669,18 +669,19 @@ class CEEJitInfo : public CEEInfo CORINFO_EH_CLAUSE* clause /* OUT */ ) override final; + HRESULT allocPgoInstrumentationBySchema( + CORINFO_METHOD_HANDLE ftnHnd, /* IN */ + PgoInstrumentationSchema* pSchema, /* IN/OUT */ + UINT32 countSchemaItems, /* IN */ + BYTE** pInstrumentationData /* OUT */ + ) override final; - HRESULT allocMethodBlockCounts ( - UINT32 count, // the count of tuples - ICorJitInfo::BlockCounts ** pBlockCounts // pointer to array of tuples - ) override final; - - HRESULT getMethodBlockCounts( - CORINFO_METHOD_HANDLE ftnHnd, - UINT32 * pCount, // pointer to the count of tuples - BlockCounts ** pBlockCounts, // pointer to array of tuples - UINT32 * pNumRuns - ) override final; + HRESULT getPgoInstrumentationResults( + CORINFO_METHOD_HANDLE ftnHnd, /* IN */ + PgoInstrumentationSchema** pSchema, /* OUT */ + UINT32* pCountSchemaItems, /* OUT */ + BYTE**pInstrumentationData /* OUT */ + ) override final; CORINFO_CLASS_HANDLE getLikelyClass( CORINFO_METHOD_HANDLE ftnHnd, @@ -884,7 +885,18 @@ class CEEJitInfo : public CEEInfo if (m_pPatchpointInfoFromJit != NULL) delete [] ((BYTE*) m_pPatchpointInfoFromJit); #endif - +#ifdef FEATURE_PGO + if (m_foundPgoData != NULL) + { + ComputedPgoData* current = m_foundPgoData; + while (current != NULL) + { + ComputedPgoData* next = current->m_next; + delete current; + current = next; + } + } +#endif } // ICorDebugInfo stuff. @@ -924,6 +936,23 @@ class CEEJitInfo : public CEEInfo PatchpointInfo* getOSRInfo(unsigned* ilOffset) override final; protected : + +#ifdef FEATURE_PGO + // PGO data + struct ComputedPgoData + { + ComputedPgoData(MethodDesc* pMD) : m_pMD(pMD) {} + + ComputedPgoData* m_next = nullptr; + MethodDesc *m_pMD; + SArray m_schema; + BYTE *m_pInstrumentationData = nullptr; + HRESULT m_hr = E_NOTIMPL; + }; + ComputedPgoData* m_foundPgoData = nullptr; +#endif + + EEJitManager* m_jitManager; // responsible for allocating memory CodeHeader* m_CodeHeader; // descriptor for JITTED code COR_ILMETHOD_DECODER * m_ILHeader; // the code header as exist in the file diff --git a/src/coreclr/vm/loaderallocator.cpp b/src/coreclr/vm/loaderallocator.cpp index 4215b8c2e7c96..4f222be4a2c03 100644 --- a/src/coreclr/vm/loaderallocator.cpp +++ b/src/coreclr/vm/loaderallocator.cpp @@ -83,6 +83,10 @@ LoaderAllocator::LoaderAllocator() m_pUMEntryThunkCache = NULL; m_nLoaderAllocator = InterlockedIncrement64((LONGLONG *)&LoaderAllocator::cLoaderAllocatorsCreated); + +#ifdef FEATURE_PGO + m_pgoManager = NULL; +#endif } LoaderAllocator::~LoaderAllocator() diff --git a/src/coreclr/vm/loaderallocator.hpp b/src/coreclr/vm/loaderallocator.hpp index 6028496573988..55abbd74c2f3c 100644 --- a/src/coreclr/vm/loaderallocator.hpp +++ b/src/coreclr/vm/loaderallocator.hpp @@ -24,6 +24,7 @@ class FuncPtrStubs; #include "crossloaderallocatorhash.h" #include "onstackreplacement.h" #include "lockedrangelist.h" +#include "pgo.h" #define VPTRU_LoaderAllocator 0x3200 @@ -192,6 +193,11 @@ class LoaderAllocator // IL stub cache with fabricated MethodTable parented by a random module in this LoaderAllocator. ILStubCache m_ILStubCache; +#ifdef FEATURE_PGO + // PgoManager to hold pgo data associated with this LoaderAllocator + Volatile m_pgoManager; +#endif // FEATURE_PGO + public: BYTE *GetVSDHeapInitialBlock(DWORD *pSize); BYTE *GetCodeHeapInitialBlock(const BYTE * loAddr, const BYTE * hiAddr, DWORD minimumSize, DWORD *pSize); @@ -480,6 +486,36 @@ class LoaderAllocator virtual void CleanupHandles() { } void RegisterFailedTypeInitForCleanup(ListLockEntry *pListLockEntry); + +#ifdef FEATURE_PGO + PgoManager *GetPgoManager() + { + return m_pgoManager; + } + + PgoManager *GetOrCreatePgoManager() + { + auto currentValue = GetPgoManager(); + if (currentValue != NULL) + { + return currentValue; + } + PgoManager::CreatePgoManager(&m_pgoManager, true); + return GetPgoManager(); + } +#endif // FEATURE_PGO +#else +#ifdef FEATURE_PGO + PgoManager *GetPgoManager() + { + return NULL; + } + + PgoManager *GetOrCreatePgoManager() + { + return NULL; + } +#endif // FEATURE_PGO #endif // !defined(DACCESS_COMPILE) diff --git a/src/coreclr/vm/method.cpp b/src/coreclr/vm/method.cpp index 1535a73a04867..5408e2029ed44 100644 --- a/src/coreclr/vm/method.cpp +++ b/src/coreclr/vm/method.cpp @@ -29,6 +29,8 @@ #include "prettyprintsig.h" #include "formattype.h" #include "fieldmarshaler.h" +#include "versionresilienthashcode.h" +#include "typehashingalgorithms.h" #ifdef FEATURE_PREJIT #include "compile.h" @@ -650,15 +652,11 @@ PTR_MethodDesc MethodDesc::GetDeclMethodDesc(UINT32 slotNumber) //******************************************************************************* // Returns a hash for the method. // The hash will be the same for the method across multiple process runs. +#ifndef DACCESS_COMPILE COUNT_T MethodDesc::GetStableHash() { WRAPPER_NO_CONTRACT; - _ASSERTE(IsRestored_NoLogging()); - DefineFullyQualifiedNameForClass(); - - const char * moduleName = GetModule()->GetSimpleName(); - const char * className; - const char * methodName = GetName(); + const char * className = NULL; if (IsLCGMethod()) { @@ -668,62 +666,18 @@ COUNT_T MethodDesc::GetStableHash() { className = ILStubResolver::GetStubClassName(this); } - else - { -#if defined(_DEBUG) - // Calling _GetFullyQualifiedNameForClass in chk build is very expensive - // since it construct the class name everytime we call this method. In chk - // builds we already have a cheaper way to get the class name - - // GetDebugClassName - which doesn't calculate the class name everytime. - // This results in huge saving in Ngen time for checked builds. - className = m_pszDebugClassName; -#else // !_DEBUG - // since this is for diagnostic purposes only, - // give up on the namespace, as we don't have a buffer to concat it - // also note this won't show array class names. - LPCUTF8 nameSpace; - MethodTable * pMT = GetMethodTable(); - - className = pMT->GetFullyQualifiedNameInfo(&nameSpace); -#endif // !_DEBUG - } - - COUNT_T hash = HashStringA(moduleName); // Start the hash with the Module name - hash = HashCOUNT_T(hash, HashStringA(className)); // Hash in the name of the Class name - hash = HashCOUNT_T(hash, HashStringA(methodName)); // Hash in the name of the Method name - - // Handle Generic Types and Generic Methods - // - if (HasClassInstantiation() && !GetMethodTable()->IsGenericTypeDefinition()) + + if (className == NULL) { - Instantiation classInst = GetClassInstantiation(); - for (DWORD i = 0; i < classInst.GetNumArgs(); i++) - { - MethodTable * pMT = classInst[i].GetMethodTable(); - // pMT can be NULL for TypeVarTypeDesc - // @TODO: Implement TypeHandle::GetStableHash instead of - // checking pMT==NULL - if (pMT) - hash = HashCOUNT_T(hash, HashStringA(GetFullyQualifiedNameForClass(pMT))); - } + return GetVersionResilientMethodHashCode(this); } - - if (HasMethodInstantiation() && !IsGenericMethodDefinition()) + else { - Instantiation methodInst = GetMethodInstantiation(); - for (DWORD i = 0; i < methodInst.GetNumArgs(); i++) - { - MethodTable * pMT = methodInst[i].GetMethodTable(); - // pMT can be NULL for TypeVarTypeDesc - // @TODO: Implement TypeHandle::GetStableHash instead of - // checking pMT==NULL - if (pMT) - hash = HashCOUNT_T(hash, HashStringA(GetFullyQualifiedNameForClass(pMT))); - } + int typeHash = ComputeNameHashCode("", className); + return typeHash ^ ComputeNameHashCode(GetName()); } - - return hash; } +#endif // DACCESS_COMPILE //******************************************************************************* // Get the number of type parameters to a generic method diff --git a/src/coreclr/vm/pgo.cpp b/src/coreclr/vm/pgo.cpp index 3b7ab178984ef..8e56f8ea7a65c 100644 --- a/src/coreclr/vm/pgo.cpp +++ b/src/coreclr/vm/pgo.cpp @@ -4,17 +4,51 @@ #include "common.h" #include "log.h" #include "pgo.h" +#include "versionresilienthashcode.h" +#include "typestring.h" +#include "pgo_formatprocessing.h" #ifdef FEATURE_PGO -ICorJitInfo::BlockCounts* PgoManager::s_PgoData; -unsigned volatile PgoManager::s_PgoIndex; +// Data structure for holding pgo data +// Need to be walkable at process shutdown without taking meaningful locks +// Need to have an associated MethodDesc for emission +// +// Need to support lookup by Exact method, and at the non-generic level as well +// In addition, lookup by some form of stable hash would be really nice for both R2R multi-module scenarios +// as well as the existing text format approach +// +// In the current implementation, the method stable hash code isn't a good replacement for "token" as it doesn't +// carry any detail about signatures, and is probably quite slow to compute +// The plan is to swap over to the typenamehash + +// Goals +// 1. Need to be able to walk at any time. +// 2. Need to be able to lookup by MethodDesc +// 3. Need to be able to lookup by Hash! + +// Solution: + +// Lookup patterns for use by JIT +// 1. For Current Runtime generated lookups, there is a SHash in each LoaderAllocator, using the MethodDesc as +// key for non-dynamic methods, and a field in the DynamicMethodDesc for the dynamic methods. +// 2. For R2R lookups, lookup via IL token exact match, as well as a hash based lookup. +// 3. For text based lookups, lookup by hash (only enabled if the ReadPGOData COMPlus is set). + +// For emission into output, we will use an approach that relies on walking linked lists +// 1. InstrumentationDataHeader shall be placed before any instrumentation data. It will be part of a linked +// list of instrumentation data that has the same lifetime. +// 2. InstrumentationDataWithEqualLifetimeHeader shall be part of a doubly linked list. This list shall be protected +// by a lock, and serves to point at the various singly linked lists of InstrumentationData. + const char* const PgoManager::s_FileHeaderString = "*** START PGO Data, max index = %u ***\n"; const char* const PgoManager::s_FileTrailerString = "*** END PGO Data ***\n"; -const char* const PgoManager::s_MethodHeaderString = "@@@ token 0x%08X hash 0x%08X ilSize 0x%08X records 0x%08X index %u\n"; -const char* const PgoManager::s_RecordString = "ilOffs %u count %u\n"; -const char* const PgoManager::s_ClassProfileHeader = "classProfile iloffs %u samples %u entries %u totalCount %u %s\n"; -const char* const PgoManager::s_ClassProfileEntry = "class %p (%s) count %u\n"; +const char* const PgoManager::s_MethodHeaderString = "@@@ codehash 0x%08X methodhash 0x%08X ilSize 0x%08X records 0x%08X\n"; +const char* const PgoManager::s_RecordString = "Schema InstrumentationKind %u ILOffset %u Count %u Other %u\n"; +const char* const PgoManager::s_None = "None\n"; +const char* const PgoManager::s_FourByte = "%u\n"; +const char* const PgoManager::s_EightByte = "%u %u\n"; +const char* const PgoManager::s_TypeHandle = "TypeHandle: %s\n"; // Data item in class profile histogram // @@ -30,36 +64,48 @@ struct HistogramEntry // struct Histogram { - Histogram(const ICorJitInfo::ClassProfile* classProfile); + Histogram(uint32_t histogramCount, INT_PTR* histogramEntries, unsigned entryCount); - // Number of nonzero entries in the histogram - unsigned m_count; - // Sum of counts from all entries in the histogram + // Sum of counts from all entries in the histogram. This includes "unknown" entries which are not captured in m_histogram unsigned m_totalCount; + // Rough guess at count of unknown types + unsigned m_unknownTypes; // Histogram entries, in no particular order. // The first m_count of these will be valid. - HistogramEntry m_histogram[ICorJitInfo::ClassProfile::SIZE]; + StackSArray m_histogram; }; -Histogram::Histogram(const ICorJitInfo::ClassProfile* classProfile) +Histogram::Histogram(uint32_t histogramCount, INT_PTR* histogramEntries, unsigned entryCount) { - m_count = 0; + m_unknownTypes = 0; m_totalCount = 0; + uint32_t unknownTypeHandleMask = 0; - for (unsigned k = 0; k < ICorJitInfo::ClassProfile::SIZE; k++) + for (unsigned k = 0; k < entryCount; k++) { - CORINFO_CLASS_HANDLE currentEntry = classProfile->ClassTable[k]; - - if (currentEntry == NULL) + + if (histogramEntries[k] == 0) { continue; } m_totalCount++; + + if (IsUnknownTypeHandle(histogramEntries[k])) + { + if (AddTypeHandleToUnknownTypeHandleMask(histogramEntries[k], &unknownTypeHandleMask)) + { + m_unknownTypes++; + } + // An unknown type handle will adjust total count but not set of entries + continue; + } + + CORINFO_CLASS_HANDLE currentEntry = (CORINFO_CLASS_HANDLE)histogramEntries[k]; bool found = false; unsigned h = 0; - for(; h < m_count; h++) + for(; h < m_histogram.GetCount(); h++) { if (m_histogram[h].m_mt == currentEntry) { @@ -71,32 +117,23 @@ Histogram::Histogram(const ICorJitInfo::ClassProfile* classProfile) if (!found) { - m_histogram[h].m_mt = currentEntry; - m_histogram[h].m_count = 1; - m_count++; + HistogramEntry newEntry; + newEntry.m_mt = currentEntry; + newEntry.m_count = 1; + m_histogram.Append(newEntry); } } - - // Zero the remainder - for (unsigned k = m_count; k < ICorJitInfo::ClassProfile::SIZE; k++) - { - m_histogram[k].m_mt = 0; - m_histogram[k].m_count = 0; - } } +PtrSHash PgoManager::s_textFormatPgoData; +CrstStatic PgoManager::s_pgoMgrLock; +PgoManager PgoManager::s_InitialPgoManager; + void PgoManager::Initialize() { - LIMITED_METHOD_CONTRACT; + STANDARD_VM_CONTRACT; - // If any PGO mode is active, allocate the slab - if ((CLRConfig::GetConfigValue(CLRConfig::INTERNAL_ReadPGOData) > 0) || - (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_WritePGOData) > 0) || - (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_TieredPGO) > 0)) - { - s_PgoData = new ICorJitInfo::BlockCounts[BUFFER_SIZE]; - s_PgoIndex = 0; - } + s_pgoMgrLock.Init(CrstLeafLock, CRST_DEFAULT); // If we're reading in counts, do that now ReadPgoData(); @@ -109,8 +146,7 @@ void PgoManager::Shutdown() void PgoManager::VerifyAddress(void* address) { - _ASSERTE(address > s_PgoData); - _ASSERTE(address <= s_PgoData + BUFFER_SIZE); + // TODO Insert an assert to check that an address is a valid pgo address } void PgoManager::WritePgoData() @@ -120,7 +156,14 @@ void PgoManager::WritePgoData() return; } - if (s_PgoData == NULL) + int pgoDataCount = 0; + EnumeratePGOHeaders([&pgoDataCount](HeaderList *pgoData) + { + pgoDataCount++; + return true; + }); + + if (pgoDataCount == 0) { return; } @@ -138,106 +181,107 @@ void PgoManager::WritePgoData() return; } - fprintf(pgoDataFile, s_FileHeaderString, s_PgoIndex); - unsigned index = 0; - const unsigned maxIndex = s_PgoIndex; + fprintf(pgoDataFile, s_FileHeaderString, pgoDataCount); - while (index < maxIndex) + EnumeratePGOHeaders([pgoDataFile](HeaderList *pgoData) { - const Header* const header = (Header*)&s_PgoData[index]; - - if ((header->recordCount < MIN_RECORD_COUNT) || (header->recordCount > MAX_RECORD_COUNT)) + int32_t schemaItems; + if (!CountInstrumentationDataSize(pgoData->header.GetData(), pgoData->header.SchemaSizeMax(), &schemaItems)) { - fprintf(pgoDataFile, "Unreasonable record count %u at index %u\n", header->recordCount, index); - break; + _ASSERTE(!"Invalid instrumentation schema"); + return true; } - fprintf(pgoDataFile, s_MethodHeaderString, header->token, header->hash, header->ilSize, header->recordCount, index); + fprintf(pgoDataFile, s_MethodHeaderString, pgoData->header.codehash, pgoData->header.methodhash, pgoData->header.ilSize, schemaItems); - index += 2; + SString tClass, tMethodName, tMethodSignature; + pgoData->header.method->GetMethodInfo(tClass, tMethodName, tMethodSignature); - ICorJitInfo::BlockCounts* records = &s_PgoData[index]; - unsigned recordCount = header->recordCount - 2; - unsigned lastOffset = 0; - bool hasClassProfile = false; - unsigned i = 0; - - while (i < recordCount) - { - const unsigned thisOffset = records[i].ILOffset; - - - if ((thisOffset & ICorJitInfo::ClassProfile::CLASS_FLAG) != 0) - { - // remainder must be class probe data - hasClassProfile = true; - break; - } + StackScratchBuffer nameBuffer; + StackScratchBuffer nameBuffer2; + fprintf(pgoDataFile, "MethodName: %s.%s\n", tClass.GetUTF8(nameBuffer), tMethodName.GetUTF8(nameBuffer2)); + fprintf(pgoDataFile, "Signature: %s\n", tMethodSignature.GetUTF8(nameBuffer)); - lastOffset = thisOffset; - fprintf(pgoDataFile, s_RecordString, records[i].ILOffset, records[i].ExecutionCount); - i++; - } + uint8_t* data = pgoData->header.GetData(); - if (hasClassProfile) + unsigned lastOffset = 0; + if (!ReadInstrumentationDataWithLayout(pgoData->header.GetData(), pgoData->header.SchemaSizeMax(), pgoData->header.countsOffset, [data, pgoDataFile] (const ICorJitInfo::PgoInstrumentationSchema &schema) { - fflush(pgoDataFile); - - // Write out histogram of each probe's data. - // We currently don't expect to be able to read this back in. - // - while (i < recordCount) + fprintf(pgoDataFile, s_RecordString, schema.InstrumentationKind, schema.ILOffset, schema.Count, schema.Other); + for (int32_t iEntry = 0; iEntry < schema.Count; iEntry++) { - // Should be enough room left for a class profile. - _ASSERTE(i + sizeof(ICorJitInfo::ClassProfile) / sizeof(ICorJitInfo::BlockCounts) <= recordCount); + size_t entryOffset = schema.Offset + iEntry * InstrumentationKindToSize(schema.InstrumentationKind); - const ICorJitInfo::ClassProfile* classProfile = (ICorJitInfo::ClassProfile*)&s_PgoData[i + index]; - - // Form a histogram... - // - Histogram h(classProfile); - - // And display... - // - // Figure out if this is a virtual or interface probe. - // - const char* profileType = "virtual"; - - if ((classProfile->ILOffset & ICorJitInfo::ClassProfile::INTERFACE_FLAG) != 0) + switch(schema.InstrumentationKind & ICorJitInfo::PgoInstrumentationKind::MarshalMask) { - profileType = "interface"; - } - - // "classProfile iloffs %u samples %u entries %u totalCount %u %s\n"; - // - fprintf(pgoDataFile, s_ClassProfileHeader, (classProfile->ILOffset & ICorJitInfo::ClassProfile::OFFSET_MASK), - classProfile->Count, h.m_count, h.m_totalCount, profileType); - - for (unsigned j = 0; j < h.m_count; j++) - { - CORINFO_CLASS_HANDLE clsHnd = h.m_histogram[j].m_mt; - const char* className = "n/a"; -#ifdef _DEBUG - TypeHandle typeHnd(clsHnd); - MethodTable* pMT = typeHnd.AsMethodTable(); - className = pMT->GetDebugClassName(); -#endif - fprintf(pgoDataFile, s_ClassProfileEntry, clsHnd, className, h.m_histogram[j].m_count); + case ICorJitInfo::PgoInstrumentationKind::None: + fprintf(pgoDataFile, s_None); + break; + case ICorJitInfo::PgoInstrumentationKind::FourByte: + fprintf(pgoDataFile, s_FourByte, (unsigned)*(uint32_t*)(data + entryOffset)); + break; + case ICorJitInfo::PgoInstrumentationKind::EightByte: + // Print a pair of 4 byte values as the PRIu64 specifier isn't generally avaialble + fprintf(pgoDataFile, s_EightByte, (unsigned)*(uint32_t*)(data + entryOffset), (unsigned)*(uint32_t*)(data + entryOffset + 4)); + break; + case ICorJitInfo::PgoInstrumentationKind::TypeHandle: + { + TypeHandle th = *(TypeHandle*)(data + entryOffset); + if (th.IsNull()) + { + fprintf(pgoDataFile, s_TypeHandle, "NULL"); + } + else + { + StackSString ss; + StackScratchBuffer nameBuffer; + TypeString::AppendType(ss, th, TypeString::FormatNamespace | TypeString::FormatFullInst | TypeString::FormatAssembly); + if (ss.GetCount() > 8192) + { + fprintf(pgoDataFile, s_TypeHandle, "unknown"); + } + else + { + fprintf(pgoDataFile, s_TypeHandle, ss.GetUTF8(nameBuffer)); + } + } + break; + } + default: + break; } - - // Advance to next entry. - // - i += sizeof(ICorJitInfo::ClassProfile) / sizeof(ICorJitInfo::BlockCounts); } + return true; + } + )) + { + return true;; } - index += recordCount; - } + return true; + }); fprintf(pgoDataFile, s_FileTrailerString); fclose(pgoDataFile); } +void ReadLineAndDiscard(FILE* file) +{ + char buffer[255]; + while (fgets(buffer, sizeof(buffer), file) != NULL) + { + auto stringLen = strlen(buffer); + if (stringLen == 0) + return; + + if (buffer[stringLen - 1] == '\n') + { + return; + } + } +} + +#ifndef DACCESS_COMPILE void PgoManager::ReadPgoData() { // Skip, if we're not reading, or we're writing profile data, or doing tiered pgo @@ -249,13 +293,6 @@ void PgoManager::ReadPgoData() return; } - // PGO data slab should already be set up, if not, just bail - // - if (s_PgoData == NULL) - { - return; - } - CLRConfigStringHolder fileName(CLRConfig::GetConfigValue(CLRConfig::INTERNAL_PGODataPath)); if (fileName == NULL) @@ -270,7 +307,7 @@ void PgoManager::ReadPgoData() return; } - char buffer[256]; + char buffer[16384]; unsigned maxIndex = 0; // Header must be first line @@ -285,70 +322,44 @@ void PgoManager::ReadPgoData() return; } - // Sanity check data will fit into the slab - // - if ((maxIndex == 0) || (maxIndex >= MAX_RECORD_COUNT)) - { - return; - } // Fill in the data // - unsigned index = 0; unsigned methods = 0; unsigned probes = 0; - bool failed = false; - while (!failed) + + while (true) // Read till the file is empty { if (fgets(buffer, sizeof(buffer), pgoDataFile) == nullptr) { break; } + bool failed = false; + // Find the next method entry line // - unsigned recordCount = 0; - unsigned token = 0; - unsigned hash = 0; + unsigned schemaCount = 0; + unsigned codehash = 0; + unsigned methodhash = 0; unsigned ilSize = 0; - unsigned rIndex = 0; - if (sscanf_s(buffer, s_MethodHeaderString, &token, &hash, &ilSize, &recordCount, &rIndex) != 5) + if (sscanf_s(buffer, s_MethodHeaderString, &codehash, &methodhash, &ilSize, &schemaCount) != 4) { continue; } - _ASSERTE(index == rIndex); - methods++; - - // If there's not enough room left, bail - if ((index + recordCount) > maxIndex) - { - failed = true; - break; - } - - Header* const header = (Header*)&s_PgoData[index]; + // Discard the next two lines that hold the string name of the method + ReadLineAndDiscard(pgoDataFile); + ReadLineAndDiscard(pgoDataFile); - header->recordCount = recordCount; - header->token = token; - header->hash = hash; - header->ilSize = ilSize; + StackSArray schemaElements; + StackSArray methodInstrumentationData; + schemaElements.Preallocate((int)schemaCount); + ICorJitInfo::PgoInstrumentationSchema lastSchema = {}; - // Sanity check - // - if ((recordCount < MIN_RECORD_COUNT) || (recordCount > MAX_RECORD_COUNT)) - { - failed = true; - break; - } - - index += 2; - - // Read il data - // - for (unsigned i = 0; i < recordCount - 2; i++) + for (unsigned i = 0; i < schemaCount; i++) { if (fgets(buffer, sizeof(buffer), pgoDataFile) == nullptr) { @@ -356,135 +367,471 @@ void PgoManager::ReadPgoData() break; } - if (sscanf_s(buffer, s_RecordString, &s_PgoData[index].ILOffset, &s_PgoData[index].ExecutionCount) != 2) + // Read schema + ICorJitInfo::PgoInstrumentationSchema schema; + + if (sscanf_s(buffer, s_RecordString, &schema.InstrumentationKind, &schema.ILOffset, &schema.Count, &schema.Other) != 4) + { + failed = true; + break; + } + + LayoutPgoInstrumentationSchema(lastSchema, &schema); + schemaElements[i] = schema; + COUNT_T entrySize = InstrumentationKindToSize(schema.InstrumentationKind); + COUNT_T maxSize = entrySize * schema.Count + (COUNT_T)schema.Offset; + methodInstrumentationData.SetCount(maxSize); + + for (int32_t iEntry = 0; !failed && iEntry < schema.Count; iEntry++) { - // This might be class profile data; if so just skip it. - // - if (strstr(buffer, "class") != buffer) + size_t entryOffset = schema.Offset + iEntry * entrySize; + if (fgets(buffer, sizeof(buffer), pgoDataFile) == nullptr) { failed = true; break; } + + switch(schema.InstrumentationKind & ICorJitInfo::PgoInstrumentationKind::MarshalMask) + { + case ICorJitInfo::PgoInstrumentationKind::None: + if (sscanf_s(buffer, s_None) != 0) + { + failed = true; + } + break; + case ICorJitInfo::PgoInstrumentationKind::FourByte: + { + unsigned val; + if (sscanf_s(buffer, s_FourByte, &val) != 1) + { + failed = true; + } + else + { + uint8_t *rawBuffer = methodInstrumentationData.OpenRawBuffer(maxSize); + *(uint32_t *)(rawBuffer + entryOffset) = (uint32_t)val; + methodInstrumentationData.CloseRawBuffer(); + } + } + break; + case ICorJitInfo::PgoInstrumentationKind::EightByte: + { + // Print a pair of 4 byte values as the PRIu64 specifier isn't generally avaialble + unsigned val, val2; + if (sscanf_s(buffer, s_EightByte, &val, &val2) != 2) + { + failed = true; + } + else + { + uint8_t *rawBuffer = methodInstrumentationData.OpenRawBuffer(maxSize); + *(uint32_t *)(rawBuffer + entryOffset) = (uint32_t)val; + *(uint32_t *)(rawBuffer + entryOffset + 4) = (uint32_t)val2; + methodInstrumentationData.CloseRawBuffer(); + } + } + break; + case ICorJitInfo::PgoInstrumentationKind::TypeHandle: + { + char* typeString; + if (strncmp(buffer, "TypeHandle: ", 12) != 0) + { + failed = true; + break; + } + typeString = buffer + 12; + size_t endOfString = strlen(typeString); + if (endOfString == 0 || (typeString[endOfString - 1] != '\n')) + { + failed = true; + break; + } + // Remove \n and replace will null + typeString[endOfString - 1] = '\0'; + + TypeHandle th; + INT_PTR ptrVal = 0; + if (strcmp(typeString, "NULL") != 0) + { + // As early type loading is likely problematic, simply drop the string into the data, and fix it up later + void* tempString = malloc(endOfString); + memcpy(tempString, typeString, endOfString); + + ptrVal = (INT_PTR)tempString; + ptrVal += 1; // Set low bit to indicate that this isn't actually a TypeHandle, but is instead a pointer + } + + uint8_t *rawBuffer = methodInstrumentationData.OpenRawBuffer(maxSize); + *(INT_PTR *)(rawBuffer + entryOffset) = ptrVal; + methodInstrumentationData.CloseRawBuffer(); + break; + } + default: + break; + } } - index++; + if (failed) + break; + + lastSchema = schema; + } + + if (failed) + continue; + + methods++; + + UINT offsetOfActualInstrumentationData; + HRESULT hr = ComputeOffsetOfActualInstrumentationData(schemaElements.GetElements(), schemaCount, sizeof(Header), &offsetOfActualInstrumentationData); + if (FAILED(hr)) + { + continue; + } + UINT offsetOfInstrumentationDataFromStartOfDataRegion = offsetOfActualInstrumentationData - sizeof(Header); + + // Adjust schema offsets to account for embedding the instrumentation schema in front of the data + for (unsigned iSchema = 0; iSchema < schemaCount; iSchema++) + { + schemaElements[iSchema].Offset += offsetOfInstrumentationDataFromStartOfDataRegion; + } + + S_SIZE_T allocationSize = S_SIZE_T(offsetOfActualInstrumentationData) + S_SIZE_T(methodInstrumentationData.GetCount()); + if (allocationSize.IsOverflow()) + { + _ASSERTE(!"Unexpected overflow"); + return; + } + + Header* methodData = (Header*)malloc(allocationSize.Value()); + methodData->HashInit(methodhash, codehash, ilSize, offsetOfInstrumentationDataFromStartOfDataRegion); + + if (!WriteInstrumentationSchema(schemaElements.GetElements(), schemaCount, methodData->GetData(), offsetOfInstrumentationDataFromStartOfDataRegion)) + { + _ASSERTE(!"Unable to write schema"); + return; } - probes += recordCount - 2; + methodInstrumentationData.Copy(((uint8_t*)methodData) + offsetOfActualInstrumentationData, methodInstrumentationData.Begin(), methodInstrumentationData.GetCount()); + + s_textFormatPgoData.Add(methodData); + probes += schemaCount; } +} +#endif // DACCESS_COMPILE + +void PgoManager::CreatePgoManager(PgoManager* volatile* ppMgr, bool loaderAllocator) +{ + CrstHolder lock(&s_pgoMgrLock); + if (*ppMgr != NULL) + return; - s_PgoIndex = maxIndex; + PgoManager* newManager; + if (loaderAllocator) + newManager = new LoaderAllocatorPgoManager(); + else + newManager = new PgoManager(); + + VolatileStore((PgoManager**)ppMgr, newManager); } -HRESULT PgoManager::allocMethodBlockCounts(MethodDesc* pMD, UINT32 count, - ICorJitInfo::BlockCounts** pBlockCounts, unsigned ilSize) +void PgoManager::Header::Init(MethodDesc *pMD, unsigned codehash, unsigned ilSize, unsigned countsOffset) { - // Initialize our out param - *pBlockCounts = NULL; + this->codehash = codehash; + this->methodhash = pMD->GetStableHash(); + this->ilSize = ilSize; + this->method = pMD; + this->countsOffset = countsOffset; +} + +HRESULT PgoManager::allocPgoInstrumentationBySchema(MethodDesc* pMD, ICorJitInfo::PgoInstrumentationSchema* pSchema, UINT32 countSchemaItems, BYTE** pInstrumentationData) +{ + STANDARD_VM_CONTRACT; - if (s_PgoData == nullptr) + PgoManager* mgr; + if (!pMD->IsDynamicMethod()) { - return E_NOTIMPL; + mgr = pMD->GetLoaderAllocator()->GetOrCreatePgoManager(); } - - unsigned methodIndex = 0; - unsigned recordCount = count + 2; - - // Look for space in the profile buffer for this method. - // Note other jit invocations may be vying for space concurrently. - // - while (true) + else { - const unsigned oldIndex = s_PgoIndex; - const unsigned newIndex = oldIndex + recordCount; - - // If there is no room left for this method, - // that's ok, we just won't profile this method. - // - if (newIndex >= BUFFER_SIZE) + PgoManager* volatile* ppMgr = pMD->AsDynamicMethodDesc()->GetResolver()->GetDynamicPgoManagerPointer(); + if (ppMgr == NULL) { return E_NOTIMPL; } - const unsigned updatedIndex = InterlockedCompareExchangeT(&s_PgoIndex, newIndex, oldIndex); + CreatePgoManager(ppMgr, false); + mgr = *ppMgr; + } - if (updatedIndex == oldIndex) - { - // Found space - methodIndex = oldIndex; - break; - } + if (mgr == NULL) + { + return E_NOTIMPL; + } + + return mgr->allocPgoInstrumentationBySchemaInstance(pMD, pSchema, countSchemaItems, pInstrumentationData); +} + +HRESULT PgoManager::ComputeOffsetOfActualInstrumentationData(const ICorJitInfo::PgoInstrumentationSchema* pSchema, UINT32 countSchemaItems, size_t headerInitialSize, UINT *offsetOfActualInstrumentationData) +{ + // Determine size of compressed schema representation + size_t headerSize = headerInitialSize; + if (!WriteInstrumentationToBytes(pSchema, countSchemaItems, [&headerSize](uint8_t byte) { headerSize = headerSize + 1; return true; })) + { + return E_NOTIMPL; } - // Fill in the header - Header* const header = (Header*)&s_PgoData[methodIndex]; - header->recordCount = recordCount; - header->token = pMD->IsDynamicMethod() ? 0 : pMD->GetMemberDef(); - header->hash = pMD->GetStableHash(); - header->ilSize = ilSize; + // Determine alignment of instrumentation data + UINT maxAlign = 0; + for (UINT32 iSchema = 0; iSchema < countSchemaItems; iSchema++) + { + maxAlign = max(InstrumentationKindToAlignment(pSchema[iSchema].InstrumentationKind), maxAlign); + } - // Return pointer to start of count records - *pBlockCounts = &s_PgoData[methodIndex + 2]; + *offsetOfActualInstrumentationData = (UINT)AlignUp(headerSize, maxAlign); return S_OK; } -HRESULT PgoManager::getMethodBlockCounts(MethodDesc* pMD, unsigned ilSize, UINT32* pCount, - ICorJitInfo::BlockCounts** pBlockCounts, UINT32* pNumRuns) +HRESULT PgoManager::allocPgoInstrumentationBySchemaInstance(MethodDesc* pMD, + ICorJitInfo::PgoInstrumentationSchema* pSchema, + UINT32 countSchemaItems, + BYTE** pInstrumentationData) { - // Initialize our out params - *pCount = 0; - *pBlockCounts = NULL; - *pNumRuns = 0; - - // Bail if there's no profile data. - // - if (s_PgoData == NULL) + // Initialize our out param + *pInstrumentationData = NULL; + int codehash; + unsigned ilSize; + if (!GetVersionResilientILCodeHashCode(pMD, &codehash, &ilSize)) { return E_NOTIMPL; } - // See if we can find counts for this method in the profile buffer. - // - const unsigned maxIndex = s_PgoIndex; - const unsigned token = pMD->IsDynamicMethod() ? 0 : pMD->GetMemberDef(); - const unsigned hash = pMD->GetStableHash(); + UINT offsetOfActualInstrumentationData; + HRESULT hr = ComputeOffsetOfActualInstrumentationData(pSchema, countSchemaItems, sizeof(HeaderList), &offsetOfActualInstrumentationData); + UINT offsetOfInstrumentationDataFromStartOfDataRegion = offsetOfActualInstrumentationData - sizeof(HeaderList); + if (FAILED(hr)) + { + return hr; + } + // Compute offsets for each instrumentation entry + ICorJitInfo::PgoInstrumentationSchema prevSchema; + memset(&prevSchema, 0, sizeof(ICorJitInfo::PgoInstrumentationSchema)); + prevSchema.Offset = offsetOfInstrumentationDataFromStartOfDataRegion; + for (UINT32 iSchema = 0; iSchema < countSchemaItems; iSchema++) + { + LayoutPgoInstrumentationSchema(prevSchema, &pSchema[iSchema]); + prevSchema = pSchema[iSchema]; + } - unsigned index = 0; - unsigned methodsChecked = 0; + S_SIZE_T allocationSize = S_SIZE_T(sizeof(HeaderList)) + S_SIZE_T(pSchema[countSchemaItems - 1].Offset + InstrumentationKindToSize(pSchema[countSchemaItems - 1].InstrumentationKind)); + + if (allocationSize.IsOverflow()) + { + return E_NOTIMPL; + } + size_t unsafeAllocationSize = allocationSize.Value(); + HeaderList* pHeaderList = NULL; - while (index < maxIndex) + if (pMD->IsDynamicMethod()) { - // The first two "records" of each entry are actually header data - // to identify the method. - // - Header* const header = (Header*)&s_PgoData[index]; + HeaderList *currentHeaderList = m_pgoHeaders; + if (currentHeaderList != NULL) + { + if (!ComparePgoSchemaEquals(currentHeaderList->header.GetData(), currentHeaderList->header.countsOffset, pSchema, countSchemaItems)) + { + return E_NOTIMPL; + } + _ASSERTE(currentHeaderList->header.method == pMD); + *pInstrumentationData = currentHeaderList->header.GetData(); + return S_OK; + } - // Sanity check that header data looks reasonable. If not, just - // fail the lookup. - // - if ((header->recordCount < MIN_RECORD_COUNT) || (header->recordCount > MAX_RECORD_COUNT)) + pHeaderList = (HeaderList*)pMD->AsDynamicMethodDesc()->GetResolver()->GetJitMetaHeap()->New(unsafeAllocationSize); + + memset(pHeaderList, 0, unsafeAllocationSize); + pHeaderList->header.Init(pMD, codehash, ilSize, offsetOfInstrumentationDataFromStartOfDataRegion); + *pInstrumentationData = pHeaderList->header.GetData(); + if (!WriteInstrumentationSchema(pSchema, countSchemaItems, *pInstrumentationData, pHeaderList->header.countsOffset)) { - break; + _ASSERTE(!"Unable to write schema"); + return E_NOTIMPL; } + m_pgoHeaders = pHeaderList; + return S_OK; + } + else + { + LoaderAllocatorPgoManager *laPgoManagerThis = (LoaderAllocatorPgoManager *)this; + CrstHolder lock(&laPgoManagerThis->m_lock); - // See if the header info matches the current method. - // - if ((header->token == token) && (header->hash == hash) && (header->ilSize == ilSize)) + HeaderList* existingData = laPgoManagerThis->m_pgoDataLookup.Lookup(pMD); + if (existingData != NULL) { - // Yep, found data. - // - *pBlockCounts = &s_PgoData[index + 2]; - *pCount = header->recordCount - 2; - *pNumRuns = 1; + if (!ComparePgoSchemaEquals(existingData->header.GetData(), existingData->header.countsOffset, pSchema, countSchemaItems)) + { + return E_NOTIMPL; + } + *pInstrumentationData = existingData->header.GetData(); return S_OK; } - index += header->recordCount; - methodsChecked++; + AllocMemTracker loaderHeapAllocation; + pHeaderList = (HeaderList*)loaderHeapAllocation.Track(pMD->GetLoaderAllocator()->GetHighFrequencyHeap()->AllocMem(allocationSize)); + memset(pHeaderList, 0, unsafeAllocationSize); + pHeaderList->header.Init(pMD, codehash, ilSize, offsetOfInstrumentationDataFromStartOfDataRegion); + pHeaderList->next = m_pgoHeaders; + *pInstrumentationData = pHeaderList->header.GetData(); + if (!WriteInstrumentationSchema(pSchema, countSchemaItems, *pInstrumentationData, pHeaderList->header.countsOffset)) + { + _ASSERTE(!"Unable to write schema"); + return E_NOTIMPL; + } + laPgoManagerThis->m_pgoDataLookup.Add(pHeaderList); + loaderHeapAllocation.SuppressRelease(); + m_pgoHeaders = pHeaderList; + return S_OK; + } +} + +#ifndef DACCESS_COMPILE +HRESULT PgoManager::getPgoInstrumentationResults(MethodDesc* pMD, SArray* pSchema, BYTE**pInstrumentationData) +{ + // Initialize our out params + pSchema->Clear(); + *pInstrumentationData = NULL; + + PgoManager *mgr; + if (!pMD->IsDynamicMethod()) + { + mgr = pMD->GetLoaderAllocator()->GetPgoManager(); + } + else + { + mgr = pMD->AsDynamicMethodDesc()->GetResolver()->GetDynamicPgoManager(); + } + + HRESULT hr = E_NOTIMPL; + if (mgr != NULL) + { + hr = mgr->getPgoInstrumentationResultsInstance(pMD, pSchema, pInstrumentationData); } - return E_NOTIMPL; + // If not found in the data from the current run, look in the data from the text file + if (FAILED(hr) && s_textFormatPgoData.GetCount() > 0) + { + COUNT_T methodhash = pMD->GetStableHash(); + int codehash; + unsigned ilSize; + if (GetVersionResilientILCodeHashCode(pMD, &codehash, &ilSize)) + { + Header *found = s_textFormatPgoData.Lookup(CodeAndMethodHash(codehash, methodhash)); + if (found != NULL) + { + if (ReadInstrumentationDataWithLayoutIntoSArray(found->GetData(), found->countsOffset, found->countsOffset, pSchema)) + { + EX_TRY + { + // TypeHandles can't reliably be loaded at ReadPGO time + // Instead, translate them before leaving this method. + // The ReadPgo method will place pointers to C style null + // terminated strings in the TypeHandle slots, and this will + // translate any of those into loaded TypeHandles as appropriate + + for (unsigned iSchema = 0; iSchema < pSchema->GetCount(); iSchema++) + { + ICorJitInfo::PgoInstrumentationSchema *schema = &(*pSchema)[iSchema]; + if ((schema->InstrumentationKind & ICorJitInfo::PgoInstrumentationKind::MarshalMask) == ICorJitInfo::PgoInstrumentationKind::TypeHandle) + { + for (int iEntry = 0; iEntry < schema->Count; iEntry++) + { + INT_PTR* typeHandleValueAddress = (INT_PTR*)(found->GetData() + schema->Offset + iEntry * InstrumentationKindToSize(schema->InstrumentationKind)); + INT_PTR initialTypeHandleValue = VolatileLoad(typeHandleValueAddress); + if (((initialTypeHandleValue & 1) == 1) && !IsUnknownTypeHandle(initialTypeHandleValue)) + { + INT_PTR newPtr = 0; + TypeHandle th; + char* typeString = ((char *)initialTypeHandleValue) - 1; + + // Don't attempt to load any types until the EE is started + if (g_fEEStarted) + { + StackSString ss(SString::Utf8, typeString); + th = TypeName::GetTypeManaged(ss.GetUnicode(), NULL, FALSE, FALSE, FALSE, NULL, NULL); + } + + if (th.IsNull()) + { + newPtr = HashToPgoUnknownTypeHandle(HashStringA(typeString)); + } + else + { + newPtr = (INT_PTR)th.AsPtr(); + } + InterlockedCompareExchangeT(typeHandleValueAddress, newPtr, initialTypeHandleValue); + } + } + } + } + + *pInstrumentationData = found->GetData(); + hr = S_OK; + } + EX_CATCH + { + hr = E_FAIL; + } + EX_END_CATCH(RethrowTerminalExceptions) + + } + else + { + _ASSERTE(!"Unable to parse schema data"); + hr = E_NOTIMPL; + } + } + } + } + + return hr; +} +#endif // DACCESS_COMPILE + +HRESULT PgoManager::getPgoInstrumentationResultsInstance(MethodDesc* pMD, SArray* pSchema, BYTE**pInstrumentationData) +{ + // Initialize our out params + pSchema->Clear(); + *pInstrumentationData = NULL; + + HeaderList *found; + + if (pMD->IsDynamicMethod()) + { + found = m_pgoHeaders; + } + else + { + LoaderAllocatorPgoManager *laPgoManagerThis = (LoaderAllocatorPgoManager *)this; + CrstHolder lock(&laPgoManagerThis->m_lock); + found = laPgoManagerThis->m_pgoDataLookup.Lookup(pMD); + } + + if (found == NULL) + { + return E_NOTIMPL; + } + + if (ReadInstrumentationDataWithLayoutIntoSArray(found->header.GetData(), found->header.countsOffset, found->header.countsOffset, pSchema)) + { + *pInstrumentationData = found->header.GetData(); + return S_OK; + } + else + { + _ASSERTE(!"Unable to parse schema data"); + return E_NOTIMPL; + } } // See if there is a class profile for this method at the indicated il Offset. @@ -498,157 +845,100 @@ CORINFO_CLASS_HANDLE PgoManager::getLikelyClass(MethodDesc* pMD, unsigned ilSize *pLikelihood = 0; *pNumberOfClasses = 0; - // Bail if there's no profile data. + StackSArray schema; + BYTE* pInstrumentationData; + HRESULT hr = getPgoInstrumentationResults(pMD, &schema, &pInstrumentationData); + + // Failed to find any sort of profile data for this method // - if (s_PgoData == NULL) + if (FAILED(hr)) { return NULL; } - // See if we can find profile data for this method in the profile buffer. - // - const unsigned maxIndex = s_PgoIndex; - const unsigned token = pMD->IsDynamicMethod() ? 0 : pMD->GetMemberDef(); - const unsigned hash = pMD->GetStableHash(); - - unsigned index = 0; - unsigned methodsChecked = 0; - - while (index < maxIndex) + // TODO This logic should be moved to the JIT + for (COUNT_T i = 0; i < schema.GetCount(); i++) { - // The first two "records" of each entry are actually header data - // to identify the method. - // - Header* const header = (Header*)&s_PgoData[index]; - - // Sanity check that header data looks reasonable. If not, just - // fail the lookup. - // - if ((header->recordCount < MIN_RECORD_COUNT) || (header->recordCount > MAX_RECORD_COUNT)) - { - break; - } + if (schema[i].ILOffset != (int32_t)ilOffset) + continue; - // See if the header info matches the current method. - // - if ((header->token == token) && (header->hash == hash) && (header->ilSize == ilSize)) + if ((schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramCount) && + (schema[i].Count == 1) && + ((i + 1) < schema.GetCount()) && + (schema[i + 1].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramTypeHandle)) { - // Yep, found data. See if there is a suitable class profile. + // Form a histogram // - // This bit is currently somewhat hacky ... we scan the records, the count records come - // first and are in increasing IL offset order. Class profiles have inverted IL offsets - // so when we find an offset with high bit set, it's going to be an class profile. - // - unsigned countILOffset = 0; - unsigned j = 2; + Histogram h(*(uint32_t*)(pInstrumentationData + schema[i].Offset), (INT_PTR*)(pInstrumentationData + schema[i + 1].Offset), schema[i + 1].Count); - // Skip past all the count entries + // Use histogram count as number of classes estimate // - while (j < header->recordCount) + *pNumberOfClasses = h.m_histogram.GetCount() + h.m_unknownTypes; + + // Report back what we've learned + // (perhaps, use count to augment likelihood?) + // + switch (*pNumberOfClasses) { - if ((s_PgoData[index + j].ILOffset & ICorJitInfo::ClassProfile::CLASS_FLAG) != 0) + case 0: { - break; + return NULL; } + break; - countILOffset = s_PgoData[index + j].ILOffset; - j++; - } - - // Now we're in the "class profile" portion of the slab for this method. - // Look for the one that has the right IL offset. - // - while (j < header->recordCount) - { - const ICorJitInfo::ClassProfile* const classProfile = (ICorJitInfo::ClassProfile*)&s_PgoData[index + j]; - - if ((classProfile->ILOffset & ICorJitInfo::ClassProfile::OFFSET_MASK) != ilOffset) + case 1: { - // Need to make sure this is even divisor - // - j += sizeof(ICorJitInfo::ClassProfile) / sizeof(ICorJitInfo::BlockCounts); - continue; + *pLikelihood = 100; + return h.m_histogram[0].m_mt; } + break; - // Form a histogram - // - Histogram h(classProfile); - - // Use histogram count as number of classes estimate - // - *pNumberOfClasses = h.m_count; - - // Report back what we've learned - // (perhaps, use count to augment likelihood?) - // - switch (h.m_count) + case 2: { - case 0: + if (h.m_histogram[0].m_count >= h.m_histogram[1].m_count) { - return NULL; + *pLikelihood = (100 * h.m_histogram[0].m_count) / h.m_totalCount; + return h.m_histogram[0].m_mt; } - break; - - case 1: + else { - *pLikelihood = 100; - return h.m_histogram[0].m_mt; + *pLikelihood = (100 * h.m_histogram[1].m_count) / h.m_totalCount; + return h.m_histogram[1].m_mt; } - break; + } + break; - case 2: + default: + { + // Find maximum entry and return it + // + unsigned maxIndex = 0; + unsigned maxCount = 0; + + for (unsigned m = 0; m < h.m_histogram.GetCount(); m++) { - if (h.m_histogram[0].m_count >= h.m_histogram[1].m_count) - { - *pLikelihood = (100 * h.m_histogram[0].m_count) / h.m_totalCount; - return h.m_histogram[0].m_mt; - } - else + if (h.m_histogram[m].m_count > maxCount) { - *pLikelihood = (100 * h.m_histogram[1].m_count) / h.m_totalCount; - return h.m_histogram[1].m_mt; + maxIndex = m; + maxCount = h.m_histogram[m].m_count; } } - break; - default: + if (maxCount > 0) { - // Find maximum entry and return it - // - unsigned maxIndex = 0; - unsigned maxCount = 0; - - for (unsigned m = 0; m < h.m_count; m++) - { - if (h.m_histogram[m].m_count > maxCount) - { - maxIndex = m; - maxCount = h.m_histogram[m].m_count; - } - } - - if (maxCount > 0) - { - *pLikelihood = (100 * maxCount) / h.m_totalCount; - return h.m_histogram[maxIndex].m_mt; - } - - return NULL; + *pLikelihood = (100 * maxCount) / h.m_totalCount; + return h.m_histogram[maxIndex].m_mt; } - break; + + return NULL; } + break; } - // Failed to find a class profile entry - // - return NULL; } - - index += header->recordCount; - methodsChecked++; } - // Failed to find any sort of profile data for this method + // Failed to find histogram data for this method // return NULL; } @@ -657,24 +947,24 @@ CORINFO_CLASS_HANDLE PgoManager::getLikelyClass(MethodDesc* pMD, unsigned ilSize // Stub version for !FEATURE_PGO builds // -HRESULT PgoManager::allocMethodBlockCounts(MethodDesc* pMD, UINT32 count, - ICorJitInfo::BlockCounts** pBlockCounts, unsigned ilSize) +HRESULT PgoManager::allocPgoInstrumentationBySchema(MethodDesc* pMD, ICorJitInfo::PgoInstrumentationSchema* pSchema, UINT32 countSchemaItems, BYTE** pInstrumentationData) { - pBlockCounts = NULL; + *pInstrumentationData = NULL; return E_NOTIMPL; } // Stub version for !FEATURE_PGO builds // -HRESULT PgoManager::getMethodBlockCounts(MethodDesc* pMD, unsigned ilSize, UINT32* pCount, - ICorJitInfo::BlockCounts** pBlockCounts, UINT32* pNumRuns) +HRESULT PgoManager::getPgoInstrumentationResults(MethodDesc* pMD, SArray* pSchema, BYTE**pInstrumentationData) { - pBlockCounts = NULL; - pCount = 0; - pNumRuns = 0; + *pInstrumentationData = NULL; return E_NOTIMPL; } +void PgoManager::VerifyAddress(void* address) +{ +} + // Stub version for !FEATURE_PGO builds // CORINFO_CLASS_HANDLE PgoManager::getLikelyClass(MethodDesc* pMD, unsigned ilSize, unsigned ilOffset) @@ -682,4 +972,9 @@ CORINFO_CLASS_HANDLE PgoManager::getLikelyClass(MethodDesc* pMD, unsigned ilSize return NULL; } +void PgoManager::CreatePgoManager(PgoManager** ppMgr, bool loaderAllocator) +{ + *ppMgr = NULL; +} + #endif // FEATURE_PGO diff --git a/src/coreclr/vm/pgo.h b/src/coreclr/vm/pgo.h index 3ba4ab6ddb871..ffb43af7b8e7f 100644 --- a/src/coreclr/vm/pgo.h +++ b/src/coreclr/vm/pgo.h @@ -3,6 +3,9 @@ #ifndef PGO_H #define PGO_H +#include "typehashingalgorithms.h" +#include "shash.h" + // PgoManager handles in-process and out of band profile data for jitted code. class PgoManager @@ -18,13 +21,10 @@ class PgoManager public: - // Allocate a profile block count buffer for a method - static HRESULT allocMethodBlockCounts(MethodDesc* pMD, UINT32 count, - ICorJitInfo::BlockCounts** pBlockCounts, unsigned ilSize); + static HRESULT getPgoInstrumentationResults(MethodDesc* pMD, SArray* pSchema, BYTE**pInstrumentationData); + static HRESULT allocPgoInstrumentationBySchema(MethodDesc* pMD, ICorJitInfo::PgoInstrumentationSchema* pSchema, UINT32 countSchemaItems, BYTE** pInstrumentationData); - // Retrieve the profile block count buffer for a method - static HRESULT getMethodBlockCounts(MethodDesc* pMD, unsigned ilSize, UINT32* pCount, - ICorJitInfo::BlockCounts** pBlockCounts, UINT32* pNumRuns); + static void CreatePgoManager(PgoManager* volatile* ppPgoManager, bool loaderAllocator); // Retrieve the most likely class for a particular call static CORINFO_CLASS_HANDLE getLikelyClass(MethodDesc* pMD, unsigned ilSize, unsigned ilOffset, UINT32* pLikelihood, UINT32* pNumberOfClasses); @@ -33,49 +33,184 @@ class PgoManager static void VerifyAddress(void* address); #ifdef FEATURE_PGO - -private: - - enum + PgoManager() { - // Number of ICorJitInfo::BlockCount records in the global slab. - // Currently 4MB for a 64 bit system. - // - BUFFER_SIZE = 8 * 64 * 1024, - MIN_RECORD_COUNT = 3, - MAX_RECORD_COUNT = BUFFER_SIZE + if (this != &s_InitialPgoManager) + { + _ASSERTE(s_pgoMgrLock.OwnedByCurrentThread()); + m_next = s_InitialPgoManager.m_next; + m_prev = &s_InitialPgoManager; + s_InitialPgoManager.m_next = this; + } + else + { + m_next = NULL; + m_prev = NULL; + } + } + + virtual ~PgoManager() + { + if (this != &s_InitialPgoManager) + { + CrstHolder holder(&s_pgoMgrLock); + m_prev->m_next = m_next; + m_next->m_prev = m_prev; + } + } + + struct CodeAndMethodHash + { + CodeAndMethodHash(unsigned codehash, unsigned methodhash) : + m_codehash(codehash), + m_methodhash(methodhash) + {} + + const unsigned m_codehash; + const unsigned m_methodhash; + + COUNT_T Hash() const + { + return CombineTwoValuesIntoHash(m_codehash, m_methodhash); + } + + bool operator==(CodeAndMethodHash other) + { + return m_codehash == other.m_codehash && m_methodhash == other.m_methodhash; + } }; struct Header { - unsigned recordCount; - unsigned token; - unsigned hash; + MethodDesc *method; + unsigned codehash; + unsigned methodhash; unsigned ilSize; + unsigned countsOffset; + + CodeAndMethodHash GetKey() const + { + return CodeAndMethodHash(codehash, methodhash); + } + + static COUNT_T Hash(CodeAndMethodHash hashpair) + { + return hashpair.Hash(); + } + + void Init(MethodDesc *pMD, unsigned codehash, unsigned ilSize, unsigned countsOffset); + void HashInit(unsigned methodhash, unsigned codehash, unsigned ilSize, unsigned countsOffset) + { + method = NULL; + this->codehash = codehash; + this->methodhash = methodhash; + this->ilSize = ilSize; + this->countsOffset = countsOffset; + } + + uint8_t* GetData() const + { + return (uint8_t*)(this + 1); + } + + size_t SchemaSizeMax() const + { + return this->countsOffset; + } }; + struct HeaderList + { + HeaderList* next; + Header header; + + MethodDesc* GetKey() const + { + return header.method; + } + static COUNT_T Hash(MethodDesc *ptr) + { + return MixPointerIntoHash(ptr); + } + }; + +protected: + + HRESULT getPgoInstrumentationResultsInstance(MethodDesc* pMD, + SArray* pSchema, + BYTE**pInstrumentationData); + + HRESULT allocPgoInstrumentationBySchemaInstance(MethodDesc* pMD, + ICorJitInfo::PgoInstrumentationSchema* pSchema, + UINT32 countSchemaItems, + BYTE** pInstrumentationData); + private: + static HRESULT ComputeOffsetOfActualInstrumentationData(const ICorJitInfo::PgoInstrumentationSchema* pSchema, UINT32 countSchemaItems, size_t headerInitialSize, UINT *offsetOfActualInstrumentationData); static void ReadPgoData(); static void WritePgoData(); private: - // Global slab holding all pgo data - static ICorJitInfo::BlockCounts* s_PgoData; - - // Index of next free entry in the global slab - static unsigned volatile s_PgoIndex; - // Formatting strings for file input/output static const char* const s_FileHeaderString; static const char* const s_FileTrailerString; static const char* const s_MethodHeaderString; static const char* const s_RecordString; - static const char* const s_ClassProfileHeader; - static const char* const s_ClassProfileEntry; + static const char* const s_None; + static const char* const s_FourByte; + static const char* const s_EightByte; + static const char* const s_TypeHandle; + + static CrstStatic s_pgoMgrLock; + static PgoManager s_InitialPgoManager; + + static PtrSHash s_textFormatPgoData; + + PgoManager *m_next = NULL; + PgoManager *m_prev = NULL; + HeaderList *m_pgoHeaders = NULL; + + template + static bool EnumeratePGOHeaders(lambda l) + { + CrstHolder lock(&s_pgoMgrLock); + PgoManager *mgrCurrent = s_InitialPgoManager.m_next; + while (mgrCurrent != NULL) + { + HeaderList *pgoHeaderCur = mgrCurrent->m_pgoHeaders; + while (pgoHeaderCur != NULL) + { + if (!l(pgoHeaderCur)) + { + return false; + } + pgoHeaderCur = pgoHeaderCur->next; + } + mgrCurrent = mgrCurrent->m_next; + } + + return true; + } #endif // FEATURE_PGO }; +#ifdef FEATURE_PGO +class LoaderAllocatorPgoManager : public PgoManager +{ + friend class PgoManager; + Crst m_lock; + PtrSHash m_pgoDataLookup; + + public: + LoaderAllocatorPgoManager() : + m_lock(CrstPgoData, CRST_DEFAULT) + {} + + virtual ~LoaderAllocatorPgoManager(){} +}; +#endif // FEATURE_PGO + #endif // PGO_H diff --git a/src/coreclr/vm/typehashingalgorithms.h b/src/coreclr/vm/typehashingalgorithms.h index f2bd3bd2ee4a2..6e393ed85cb83 100644 --- a/src/coreclr/vm/typehashingalgorithms.h +++ b/src/coreclr/vm/typehashingalgorithms.h @@ -162,11 +162,21 @@ inline static UINT32 XXHash32_MixEmptyState() return 374761393U; // Prime5 } +inline static UINT32 XXHash32_MixState(UINT32 v1, UINT32 v2, UINT32 v3, UINT32 v4) +{ + return (UINT32)_rotl(v1, 1) + (UINT32)_rotl(v2, 7) + (UINT32)_rotl(v3, 12) + (UINT32)_rotl(v4, 18); +} + inline static UINT32 XXHash32_QueueRound(UINT32 hash, UINT32 queuedValue) { return ((UINT32)_rotl((int)(hash + queuedValue * 3266489917U/*Prime3*/), 17)) * 668265263U/*Prime4*/; } +inline static UINT32 XXHash32_Round(UINT32 hash, UINT32 input) +{ + return ((UINT32)_rotl((int)(hash + input * 2246822519U/*Prime2*/), 13)) * 2654435761U/*Prime1*/; +} + inline static UINT32 XXHash32_MixFinal(UINT32 hash) { hash ^= hash >> 15; @@ -177,6 +187,24 @@ inline static UINT32 XXHash32_MixFinal(UINT32 hash) return hash; } +inline static UINT32 MixOneValueIntoHash(UINT32 value1) +{ + // This matches the behavior of System.HashCode.Combine(value1) as of the time of authoring + + // Provide a way of diffusing bits from something with a limited + // input hash space. For example, many enums only have a few + // possible hashes, only using the bottom few bits of the code. Some + // collections are built on the assumption that hashes are spread + // over a larger space, so diffusing the bits may help the + // collection work more efficiently. + + DWORD hash = XXHash32_MixEmptyState(); + hash += 4; + hash = XXHash32_QueueRound(hash, value1); + hash = XXHash32_MixFinal(hash); + return hash; +} + inline static UINT32 CombineTwoValuesIntoHash(UINT32 value1, UINT32 value2) { // This matches the behavior of System.HashCode.Combine(value1, value2) as of the time of authoring @@ -187,3 +215,124 @@ inline static UINT32 CombineTwoValuesIntoHash(UINT32 value1, UINT32 value2) hash = XXHash32_MixFinal(hash); return hash; } + +inline static UINT32 MixPointerIntoHash(void* ptr) +{ +#ifdef HOST_64BIT + return CombineTwoValuesIntoHash((UINT32)(UINT_PTR)ptr, (UINT32)(((UINT64)ptr) >> 32)); +#else + return MixOneValueIntoHash((UINT32)ptr); +#endif +} + + +inline static UINT32 CombineThreeValuesIntoHash(UINT32 value1, UINT32 value2, UINT32 value3) +{ + // This matches the behavior of System.HashCode.Combine(value1, value2, value3) as of the time of authoring + DWORD hash = XXHash32_MixEmptyState(); + hash += 12; + hash = XXHash32_QueueRound(hash, value1); + hash = XXHash32_QueueRound(hash, value2); + hash = XXHash32_QueueRound(hash, value3); + hash = XXHash32_MixFinal(hash); + return hash; +} + +// This is a port of the System.HashCode logic for computing a hashcode using the xxHash algorithm +// However, as this is intended to provide a stable hash, the seed value is always 0. +class xxHash +{ + const uint32_t seed = 0; + const uint32_t Prime1 = 2654435761U; + const uint32_t Prime2 = 2246822519U; + const uint32_t Prime3 = 3266489917U; + const uint32_t Prime4 = 668265263U; + const uint32_t Prime5 = 374761393U; + + uint32_t _v1 = seed + Prime1 + Prime2; + uint32_t _v2 = seed + Prime2; + uint32_t _v3 = seed; + uint32_t _v4 = seed - Prime1; + uint32_t _queue1 = 0; + uint32_t _queue2 = 0; + uint32_t _queue3 = 0; + uint32_t _length = 0; + +public: + void Add(uint32_t val) + { + // The original xxHash works as follows: + // 0. Initialize immediately. We can't do this in a struct (no + // default ctor). + // 1. Accumulate blocks of length 16 (4 uints) into 4 accumulators. + // 2. Accumulate remaining blocks of length 4 (1 uint) into the + // hash. + // 3. Accumulate remaining blocks of length 1 into the hash. + + // There is no need for #3 as this type only accepts ints. _queue1, + // _queue2 and _queue3 are basically a buffer so that when + // ToHashCode is called we can execute #2 correctly. + + // Storing the value of _length locally shaves of quite a few bytes + // in the resulting machine code. + uint32_t previousLength = _length++; + uint32_t position = previousLength % 4; + + // Switch can't be inlined. + + if (position == 0) + _queue1 = val; + else if (position == 1) + _queue2 = val; + else if (position == 2) + _queue3 = val; + else // position == 3 + { + _v1 = XXHash32_Round(_v1, _queue1); + _v2 = XXHash32_Round(_v2, _queue2); + _v3 = XXHash32_Round(_v3, _queue3); + _v4 = XXHash32_Round(_v4, val); + } + } + + uint32_t ToHashCode() + { + // Storing the value of _length locally shaves of quite a few bytes + // in the resulting machine code. + uint32_t length = _length; + + // position refers to the *next* queue position in this method, so + // position == 1 means that _queue1 is populated; _queue2 would have + // been populated on the next call to Add. + uint32_t position = length % 4; + + // If the length is less than 4, _v1 to _v4 don't contain anything + // yet. xxHash32 treats this differently. + + uint32_t hash = length < 4 ? XXHash32_MixEmptyState() : XXHash32_MixState(_v1, _v2, _v3, _v4); + + // _length is incremented once per Add(Int32) and is therefore 4 + // times too small (xxHash length is in bytes, not ints). + + hash += length * 4; + + // Mix what remains in the queue + + // Switch can't be inlined right now, so use as few branches as + // possible by manually excluding impossible scenarios (position > 1 + // is always false if position is not > 0). + if (position > 0) + { + hash = XXHash32_QueueRound(hash, _queue1); + if (position > 1) + { + hash = XXHash32_QueueRound(hash, _queue2); + if (position > 2) + hash = XXHash32_QueueRound(hash, _queue3); + } + } + + hash = XXHash32_MixFinal(hash); + return (int)hash; + } +}; diff --git a/src/coreclr/vm/versionresilienthashcode.cpp b/src/coreclr/vm/versionresilienthashcode.cpp index 5321ae79f9a13..cdf11c2353601 100644 --- a/src/coreclr/vm/versionresilienthashcode.cpp +++ b/src/coreclr/vm/versionresilienthashcode.cpp @@ -4,6 +4,7 @@ #include "common.h" #include "versionresilienthashcode.h" #include "typehashingalgorithms.h" +#include "openum.h" bool GetVersionResilientTypeHashCode(IMDInternalImport *pMDImport, mdExportedType token, int * pdwHashCode) { @@ -146,4 +147,254 @@ int GetVersionResilientModuleHashCode(Module* pModule) { return ComputeNameHashCode(pModule->GetSimpleName()); } + +class ILInstructionParser +{ + const uint8_t *_pCode; + uint32_t _cbCode; + +public: + ILInstructionParser(const uint8_t *pCode, uint32_t cbCode) : + _pCode(pCode), _cbCode(cbCode) + {} + + bool GetByte(uint8_t *data) + { + if (_cbCode >= 1) + { + *data = *_pCode; + _cbCode--; + return true; + } + return false; + } + + bool GetUInt16(uint16_t *data) + { + if (_cbCode >= 2) + { + *data = *(uint16_t UNALIGNED*)_pCode; + _cbCode -= 2; + return true; + } + return false; + } + + bool GetUInt32(uint32_t *data) + { + if (_cbCode >= 4) + { + *data = *(uint32_t UNALIGNED*)_pCode; + _cbCode -= 4; + return true; + } + return false; + } + + bool IsEmpty() + { + return _cbCode == 0; + } +}; + +// Use the SigParser type to handle bounds checks safely +bool AddVersionResilientHashCodeForInstruction(ILInstructionParser *parser, xxHash *hash) +{ + uint16_t opcodeValue; + BYTE firstByte; + if (!parser->GetByte(&firstByte)) + { + return false; + } + if (firstByte != 0xFE) + { + opcodeValue = 0xFF00 | firstByte; + } + else + { + BYTE secondByte; + if (!parser->GetByte(&secondByte)) + { + return false; + } + opcodeValue = (((uint16_t)firstByte) << 8) | (uint16_t)secondByte; + } + + hash->Add(opcodeValue); + + opcode_format_t opcodeFormat; + switch (opcodeValue) + { +#define OPDEF_REAL_OPCODES_ONLY +#define OPDEF(name, stringname, stackpop, stackpush, params, kind, len, byte1, byte2, ctrl) \ + case (((uint16_t)byte1) << 8) | (uint16_t)byte2: opcodeFormat = params; break; +#include "opcode.def" +#undef OPDEF +#undef OPDEF_REAL_OPCODES_ONLY + default: _ASSERTE(false); return false; + } + + switch (opcodeFormat) + { + case InlineNone: // no inline args + return opcodeValue; + + case ShortInlineI: + case ShortInlineBrTarget: + case ShortInlineVar: // 1 byte value which is token change resilient + { + uint8_t varValue; + if (!parser->GetByte(&varValue)) + return false; + hash->Add(varValue); + break; + } + + case InlineVar: // 2 byte value which is token change resilient + { + uint16_t varValue; + if (!parser->GetUInt16(&varValue)) + return false; + hash->Add(varValue); + break; + } + case InlineI: + case InlineBrTarget: + case ShortInlineR: // 4 byte value which is token change resilient + { + uint32_t varValue; + if (!parser->GetUInt32(&varValue)) + return false; + hash->Add(varValue); + break; + } + + case InlineR: + case InlineI8: // 8 byte value which is token change resilient + { + // Handle as a pair of 4 byte values + uint32_t varValue; + uint32_t varValue2; + if (!parser->GetUInt32(&varValue)) + return false; + if (!parser->GetUInt32(&varValue2)) + return false; + hash->Add(varValue); + hash->Add(varValue2); + break; + } + + case InlineSwitch: + { + // Switch is variable length, so use a variable length hash function + uint32_t switchCount; + if (!parser->GetUInt32(&switchCount)) + return false; + + hash->Add(opcodeValue); + hash->Add(switchCount); + for (;switchCount > 0; switchCount--) + { + uint32_t switchEntry; + if (!parser->GetUInt32(&switchEntry)) + return false; + hash->Add(switchEntry); + } + break; + } + + case InlineMethod: + case InlineField: + case InlineType: + case InlineString: + case InlineSig: + case InlineTok: + { + // 4 byte value which is token dependent. Ignore. + uint32_t varValue; + if (!parser->GetUInt32(&varValue)) + return false; + break; + } + default: + { + // Bad code + _ASSERTE(FALSE); + return false; + } + } + + return true; +} + +bool GetVersionResilientILCodeHashCode(MethodDesc *pMD, int* hashCode, unsigned* ilSize) +{ + uint32_t maxStack; + uint32_t EHCount; + const BYTE* pILCode; + uint32_t cbILCode; + bool initLocals; + SigParser localSig; + + xxHash hashILData; + + if (pMD->IsDynamicMethod()) + { + DynamicResolver * pResolver = pMD->AsDynamicMethodDesc()->GetResolver(); + CorInfoOptions options; + pILCode = pResolver->GetCodeInfo(&cbILCode, + &maxStack, + &options, + &EHCount); + + localSig = pResolver->GetLocalSig(); + + initLocals = (options & CORINFO_OPT_INIT_LOCALS) == CORINFO_OPT_INIT_LOCALS; + } + else + { + COR_ILMETHOD_DECODER header(pMD->GetILHeader(TRUE), pMD->GetMDImport(), NULL); + + pILCode = header.GetCode(); + cbILCode = header.GetCodeSize(); + maxStack = header.GetMaxStack(); + EHCount = header.EHCount(); + initLocals = (header.GetFlags() & CorILMethod_InitLocals) == CorILMethod_InitLocals; + localSig = SigParser(header.LocalVarSig, header.cbLocalVarSig); + + for (uint32_t ehClause = 0; ehClause < EHCount; ehClause++) + { + IMAGE_COR_ILMETHOD_SECT_EH_CLAUSE_FAT ehClauseBuf; + auto ehInfo = header.EH->EHClause(ehClause, &ehClauseBuf); + + hashILData.Add(ehInfo->Flags); + hashILData.Add(ehInfo->TryOffset); + hashILData.Add(ehInfo->TryLength); + hashILData.Add(ehInfo->HandlerLength); + hashILData.Add(ehInfo->HandlerOffset); + if ((ehInfo->Flags & COR_ILEXCEPTION_CLAUSE_FILTER) == COR_ILEXCEPTION_CLAUSE_FILTER) + { + hashILData.Add(ehInfo->FilterOffset); + } + // Do not hash the classToken field, as is possibly token dependent + } + } + + hashILData.Add(maxStack); + hashILData.Add(EHCount); + + ILInstructionParser ilParser(pILCode, cbILCode); + *ilSize = cbILCode; + while (!ilParser.IsEmpty()) + { + if (!AddVersionResilientHashCodeForInstruction(&ilParser, &hashILData)) + return false; + } + + // TODO! Analyze if adding hash of non-token depenendent portions of local signature is useful + *hashCode = (int)hashILData.ToHashCode(); + return true; +} + + #endif // DACCESS_COMPILE diff --git a/src/coreclr/vm/versionresilienthashcode.h b/src/coreclr/vm/versionresilienthashcode.h index 9486df770f0f8..577ec32432441 100644 --- a/src/coreclr/vm/versionresilienthashcode.h +++ b/src/coreclr/vm/versionresilienthashcode.h @@ -8,3 +8,5 @@ bool GetVersionResilientTypeHashCode(IMDInternalImport *pMDImport, mdExportedTyp int GetVersionResilientMethodHashCode(MethodDesc *pMD); int GetVersionResilientModuleHashCode(Module* pModule); + +bool GetVersionResilientILCodeHashCode(MethodDesc *pMD, int* hashCode, unsigned* ilSize); diff --git a/src/coreclr/zap/zapinfo.cpp b/src/coreclr/zap/zapinfo.cpp index 557367c31c844..d49fe508fe867 100644 --- a/src/coreclr/zap/zapinfo.cpp +++ b/src/coreclr/zap/zapinfo.cpp @@ -74,6 +74,16 @@ ZapInfo::~ZapInfo() #ifdef FEATURE_EH_FUNCLETS delete [] m_pMainUnwindInfo; #endif + if (m_pgoResults != nullptr) + { + ProfileDataResults* current = m_pgoResults; + while (current != nullptr) + { + ProfileDataResults* next = current->m_next; + delete current; + current = next; + } + } } #ifdef ALLOW_SXS_JIT_NGEN @@ -901,26 +911,32 @@ bool ZapInfo::runWithErrorTrap(void (*function)(void*), void* param) return m_pEEJitInfo->runWithErrorTrap(function, param); } -HRESULT ZapInfo::allocMethodBlockCounts ( - UINT32 count, // the count of tuples - ICorJitInfo::BlockCounts ** pBlockCounts // pointer to array of tuples - ) +HRESULT ZapInfo::allocPgoInstrumentationBySchema(CORINFO_METHOD_HANDLE ftnHnd, + PgoInstrumentationSchema* pSchema, + UINT32 countSchemaItems, + BYTE** pInstrumentationData) { HRESULT hr; + *pInstrumentationData = nullptr; if (m_zapper->m_pOpt->m_compilerFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_IL_STUB)) { - *pBlockCounts = nullptr; + return E_NOTIMPL; + } + + // Only allocation of PGO data for the current method is supported. + if (m_currentMethodHandle != ftnHnd) + { return E_NOTIMPL; } // @TODO: support generic methods from other assemblies if (m_currentMethodModule != m_pImage->m_hModule) { - *pBlockCounts = nullptr; return E_NOTIMPL; } + mdMethodDef md = m_currentMethodToken; if (IsNilToken(md)) @@ -941,44 +957,60 @@ HRESULT ZapInfo::allocMethodBlockCounts ( return E_FAIL; } + // Validate that each schema item is only used for a basic block count + for (UINT32 iSchema = 0; iSchema < countSchemaItems; iSchema++) + { + if (pSchema[iSchema].InstrumentationKind != PgoInstrumentationKind::BasicBlockIntCount) + return E_NOTIMPL; + if (pSchema[iSchema].Count != 1) + return E_NOTIMPL; + } + // If the JIT retries the compilation (especially during JIT stress), it can // try to allocate the profiling data multiple times. We will just keep track // of the latest copy in this case. // _ASSERTE(m_pProfileData == NULL); - DWORD totalSize = (DWORD) (count * sizeof(ICorJitInfo::BlockCounts)) + sizeof(CORBBTPROF_METHOD_HEADER); + DWORD totalSize = (DWORD) (countSchemaItems * sizeof(ICorJitInfo::BlockCounts)) + sizeof(CORBBTPROF_METHOD_HEADER); m_pProfileData = ZapBlobWithRelocs::NewAlignedBlob(m_pImage, NULL, totalSize, sizeof(DWORD)); CORBBTPROF_METHOD_HEADER * profileData = (CORBBTPROF_METHOD_HEADER *) m_pProfileData->GetData(); profileData->size = totalSize; profileData->cDetail = 0; profileData->method.token = md; profileData->method.ILSize = m_currentMethodInfo.ILCodeSize; - profileData->method.cBlock = count; + profileData->method.cBlock = countSchemaItems; + + ICorJitInfo::BlockCounts* blockCounts = (ICorJitInfo::BlockCounts *)(&profileData->method.block[0]); + *pInstrumentationData = (BYTE*)blockCounts; - *pBlockCounts = (ICorJitInfo::BlockCounts *)(&profileData->method.block[0]); + for (UINT32 iSchema = 0; iSchema < countSchemaItems; iSchema++) + { + // Update schema have correct offsets + pSchema[iSchema].Offset = (BYTE*)&blockCounts[iSchema].ExecutionCount - (BYTE*)blockCounts; + // Insert IL Offsets into block data to match schema + blockCounts[iSchema].ILOffset = pSchema[iSchema].ILOffset; + } return S_OK; } -HRESULT ZapInfo::getMethodBlockCounts ( - CORINFO_METHOD_HANDLE ftnHnd, - UINT32 * pCount, // pointer to the count of tuples - BlockCounts ** pBlockCounts, // pointer to array of tuples - UINT32 * pNumRuns - ) +HRESULT ZapInfo::getPgoInstrumentationResults(CORINFO_METHOD_HANDLE ftnHnd, + PgoInstrumentationSchema **pSchema, // pointer to the schema table which describes the instrumentation results (pointer will not remain valid after jit completes) + UINT32 * pCountSchemaItems, // pointer to the count schema items + BYTE ** pInstrumentationData) // pointer to the actual instrumentation data (pointer will not remain valid after jit completes) { - _ASSERTE(pBlockCounts != nullptr); - _ASSERTE(pCount != nullptr); + _ASSERTE(pCountSchemaItems != nullptr); + _ASSERTE(pInstrumentationData != nullptr); + _ASSERTE(pSchema != nullptr); HRESULT hr; // Initialize outputs in case we return E_FAIL - *pBlockCounts = nullptr; - *pCount = 0; - if (pNumRuns != nullptr) - { - *pNumRuns = 0; - } + *pCountSchemaItems = 0; + *pSchema = nullptr; + *pInstrumentationData = nullptr; + + int32_t numRuns = 0; // For generic instantiations whose IL is in another module, // the profile data is in that module @@ -1004,71 +1036,102 @@ HRESULT ZapInfo::getMethodBlockCounts ( return E_FAIL; } - if (pNumRuns != nullptr) + ProfileDataResults* pgoResults = m_pgoResults; + while (pgoResults != nullptr) { - *pNumRuns = m_pImage->m_profileDataNumRuns; + if (pgoResults->m_ftn == ftnHnd) + break; + pgoResults = pgoResults->m_next; } - const ZapImage::ProfileDataHashEntry * foundEntry = m_pImage->profileDataHashTable.LookupPtr(md); - - if (foundEntry == NULL) + if (pgoResults == nullptr) { - return E_FAIL; - } + const ZapImage::ProfileDataHashEntry * foundEntry = m_pImage->profileDataHashTable.LookupPtr(md); - // The md must match. - _ASSERTE(foundEntry->md == md); + if (foundEntry == NULL) + { + return E_FAIL; + } - if (foundEntry->pos == 0) - { - // We might not have profile data and instead only have CompileStatus and flags - assert(foundEntry->size == 0); - return E_FAIL; - } + // The md must match. + _ASSERTE(foundEntry->md == md); - // - // - // We found the md. Let's retrieve the profile data. - // - _ASSERTE(foundEntry->size >= sizeof(CORBBTPROF_METHOD_HEADER)); // The size must at least this + if (foundEntry->pos == 0) + { + // We might not have profile data and instead only have CompileStatus and flags + assert(foundEntry->size == 0); + return E_FAIL; + } - ProfileReader profileReader(DataSection_MethodBlockCounts->pData, DataSection_MethodBlockCounts->dataSize); + // + // + // We found the md. Let's retrieve the profile data. + // + _ASSERTE(foundEntry->size >= sizeof(CORBBTPROF_METHOD_HEADER)); // The size must at least this - // Locate the method in interest. - SEEK(foundEntry->pos); - CORBBTPROF_METHOD_HEADER * profileData; - READ_SIZE(profileData, CORBBTPROF_METHOD_HEADER, foundEntry->size); - _ASSERTE(profileData->method.token == foundEntry->md); // We should be looking at the right method - _ASSERTE(profileData->size == foundEntry->size); // and the cached size must match + ProfileReader profileReader(DataSection_MethodBlockCounts->pData, DataSection_MethodBlockCounts->dataSize); - *pBlockCounts = (ICorJitInfo::BlockCounts *) &profileData->method.block[0]; - *pCount = profileData->method.cBlock; + // Locate the method in interest. + SEEK(foundEntry->pos); + CORBBTPROF_METHOD_HEADER * profileData; + READ_SIZE(profileData, CORBBTPROF_METHOD_HEADER, foundEntry->size); + _ASSERTE(profileData->method.token == foundEntry->md); // We should be looking at the right method + _ASSERTE(profileData->size == foundEntry->size); // and the cached size must match - // Find method's IL size - // - unsigned ilSize = m_currentMethodInfo.ILCodeSize; + // Find method's IL size + // + unsigned ilSize = m_currentMethodInfo.ILCodeSize; - if (ftnHnd != m_currentMethodHandle) - { - CORINFO_METHOD_INFO methodInfo; - if (!getMethodInfo(ftnHnd, &methodInfo)) + if (ftnHnd != m_currentMethodHandle) { - return E_FAIL; + CORINFO_METHOD_INFO methodInfo; + if (!getMethodInfo(ftnHnd, &methodInfo)) + { + return E_FAIL; + } + ilSize = methodInfo.ILCodeSize; } - ilSize = methodInfo.ILCodeSize; - } - // If the ILSize is non-zero the the ILCodeSize also must match - // - if ((profileData->method.ILSize != 0) && (profileData->method.ILSize != ilSize)) - { - // IL code for this method does not match the IL code for the method when it was profiled - // in such cases we tell the JIT to discard the profile data by returning E_FAIL + // If the ILSize is non-zero the the ILCodeSize also must match // - return E_FAIL; + if ((profileData->method.ILSize != 0) && (profileData->method.ILSize != ilSize)) + { + // IL code for this method does not match the IL code for the method when it was profiled + // in such cases we tell the JIT to discard the profile data by returning E_FAIL + // + return E_FAIL; + } + + pgoResults = new ProfileDataResults(ftnHnd); + pgoResults->m_next = m_pgoResults; + m_pgoResults = pgoResults; + + pgoResults->pInstrumentationData = (BYTE*)&profileData->method.block[0]; + + ICorJitInfo::BlockCounts *blockCounts = (ICorJitInfo::BlockCounts *) &profileData->method.block[0]; + + PgoInstrumentationSchema numRunsSchema = {}; + numRunsSchema.Count = 1; + numRunsSchema.Other = m_pImage->m_profileDataNumRuns; + numRunsSchema.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::NumRuns; + pgoResults->m_schema.Append(numRunsSchema); + for (UINT32 iSchema = 0; iSchema < profileData->method.cBlock; iSchema++) + { + PgoInstrumentationSchema blockCountSchema = {}; + blockCountSchema.Count = 1; + blockCountSchema.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount; + blockCountSchema.ILOffset = blockCounts[iSchema].ILOffset; + blockCountSchema.Offset = (BYTE *)&blockCounts[iSchema].ExecutionCount - (BYTE*)blockCounts; + pgoResults->m_schema.Append(blockCountSchema); + } + pgoResults->m_hr = S_OK; } - return S_OK; + *pCountSchemaItems = pgoResults->m_schema.GetCount(); + *pSchema = pgoResults->m_schema.GetElements(); + *pInstrumentationData = pgoResults->pInstrumentationData; + + return pgoResults->m_hr; } CORINFO_CLASS_HANDLE ZapInfo::getLikelyClass( @@ -4207,7 +4270,8 @@ BOOL ZapInfo::CurrentMethodHasProfileData() { WRAPPER_NO_CONTRACT; UINT32 size; - ICorJitInfo::BlockCounts * pBlockCounts; - return SUCCEEDED(getMethodBlockCounts(m_currentMethodHandle, &size, &pBlockCounts, NULL)); + ICorJitInfo::PgoInstrumentationSchema * pSchema; + BYTE* pData; + return SUCCEEDED(getPgoInstrumentationResults(m_currentMethodHandle, &pSchema, &size, &pData)); } diff --git a/src/coreclr/zap/zapinfo.h b/src/coreclr/zap/zapinfo.h index bf3ddf0fc366b..3a72d2f46a5d2 100644 --- a/src/coreclr/zap/zapinfo.h +++ b/src/coreclr/zap/zapinfo.h @@ -250,6 +250,17 @@ class ZapInfo CORINFO_ACCESS_FLAGS accessFlags, BOOL fAllowThunk); + struct ProfileDataResults + { + ProfileDataResults(CORINFO_METHOD_HANDLE ftn) : m_ftn(ftn) {} + ProfileDataResults* m_next = nullptr; + CORINFO_METHOD_HANDLE m_ftn; + SArray m_schema; + BYTE *pInstrumentationData = nullptr; + HRESULT m_hr = E_FAIL; + }; + ProfileDataResults *m_pgoResults = nullptr; + public: ZapInfo(ZapImage * pImage, mdMethodDef md, CORINFO_METHOD_HANDLE handle, CORINFO_MODULE_HANDLE module, unsigned methodProfilingDataFlags); ~ZapInfo();