From 99e58d55c1f02e242f9cbe298d4f31b1a1563207 Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Tue, 21 Jun 2022 10:13:43 -0700 Subject: [PATCH] Fix SIMD data overallocation (#71043) The JIT was allocating 16 bytes for TYP_SIMD8 static data loads, and marking the data section as requiring 16 byte alignment as well. Fix this to not allocate more than necessary, or over align. --- src/coreclr/jit/emit.cpp | 18 +++++++++++++++--- src/coreclr/jit/emit.h | 2 +- src/coreclr/jit/lower.cpp | 11 ++++++++--- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index f68ce116bb8f4..f3bd2423813ba 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -7275,7 +7275,12 @@ UNATIVE_OFFSET emitter::emitDataGenBeg(unsigned size, unsigned alignment, var_ty } assert((secOffs % alignment) == 0); - emitConsDsc.alignment = max(emitConsDsc.alignment, alignment); + if (emitConsDsc.alignment < alignment) + { + JITDUMP("Increasing data section alignment from %u to %u for type %s\n", emitConsDsc.alignment, alignment, + varTypeName(dataType)); + emitConsDsc.alignment = alignment; + } /* Advance the current offset */ emitConsDsc.dsdOffs += size; @@ -7673,7 +7678,7 @@ void emitter::emitOutputDataSec(dataSecDsc* sec, BYTE* dst) if (emitComp->opts.disAsm) { - emitDispDataSec(sec); + emitDispDataSec(sec, dst); } unsigned secNum = 0; @@ -7794,13 +7799,14 @@ void emitter::emitOutputDataSec(dataSecDsc* sec, BYTE* dst) // // Arguments: // section - the data section description +// dst - address of the data section // // Notes: // The output format attempts to mirror typical assembler syntax. // Data section entries lack type information so float/double entries // are displayed as if they are integers/longs. // -void emitter::emitDispDataSec(dataSecDsc* section) +void emitter::emitDispDataSec(dataSecDsc* section, BYTE* dst) { printf("\n"); @@ -7808,11 +7814,17 @@ void emitter::emitDispDataSec(dataSecDsc* section) for (dataSection* data = section->dsdList; data != nullptr; data = data->dsNext) { + if (emitComp->opts.disAddr) + { + printf("; @" FMT_ADDR "\n", DBG_ADDR(dst)); + } + const char* labelFormat = "%-7s"; char label[64]; sprintf_s(label, ArrLen(label), "RWD%02u", offset); printf(labelFormat, label); offset += data->dsSize; + dst += data->dsSize; if ((data->dsType == dataSection::blockRelative32) || (data->dsType == dataSection::blockAbsoluteAddr)) { diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 042b50e7e37c5..053381eb8f027 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -2555,7 +2555,7 @@ class emitter void emitOutputDataSec(dataSecDsc* sec, BYTE* dst); #ifdef DEBUG - void emitDispDataSec(dataSecDsc* section); + void emitDispDataSec(dataSecDsc* section, BYTE* dst); #endif /************************************************************************/ diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index f5ddb4069ccc2..0450beb2b2607 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7384,6 +7384,7 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode) if (arg->IsCnsFltOrDbl()) { + noway_assert(constArgCount < ArrLen(constArgValues)); constArgValues[constArgCount] = static_cast(arg->AsDblCon()->gtDconVal); constArgCount++; } @@ -7396,10 +7397,14 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode) BlockRange().Remove(arg); } - assert(sizeof(constArgValues) == 16); + // For SIMD12, even though there might be 12 bytes of constants, we need to store 16 bytes of data + // since we've bashed the node the TYP_SIMD16 and do a 16-byte indirection. + assert(varTypeIsSIMD(simdNode)); + const unsigned cnsSize = genTypeSize(simdNode); + assert(cnsSize <= sizeof(constArgValues)); - unsigned cnsSize = sizeof(constArgValues); - unsigned cnsAlign = (comp->compCodeOpt() != Compiler::SMALL_CODE) ? cnsSize : 1; + const unsigned cnsAlign = + (comp->compCodeOpt() != Compiler::SMALL_CODE) ? cnsSize : emitter::dataSection::MIN_DATA_ALIGN; CORINFO_FIELD_HANDLE hnd = comp->GetEmitter()->emitBlkConst(constArgValues, cnsSize, cnsAlign, simdNode->GetSimdBaseType());