diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 75583e90dda9f..1f0ede80c7a8a 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -937,9 +937,20 @@ void CodeGen::genSpillVar(GenTree* tree) // but we will kill the var in the reg (below). if (!varDsc->IsAlwaysAliveInMemory()) { - instruction storeIns = ins_Store(lclType, compiler->isSIMDTypeLocalAligned(varNum)); assert(varDsc->GetRegNum() == tree->GetRegNum()); - inst_TT_RV(storeIns, size, tree, tree->GetRegNum()); +#if defined(FEATURE_SIMD) + if (lclType == TYP_SIMD12) + { + // Store SIMD12 to stack as 12 bytes + GetEmitter()->emitStoreSimd12ToLclOffset(varNum, tree->AsLclVarCommon()->GetLclOffs(), + tree->GetRegNum(), nullptr); + } + else +#endif + { + instruction storeIns = ins_Store(lclType, compiler->isSIMDTypeLocalAligned(varNum)); + inst_TT_RV(storeIns, size, tree, tree->GetRegNum()); + } } // We should only have both GTF_SPILL (i.e. the flag causing this method to be called) and diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index ff88d9fe29c98..8dfca521a8117 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -17064,12 +17064,28 @@ void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNu // store lower 8 bytes emitIns_S_R(INS_str, EA_8BYTE, dataReg, varNum, offset); - // Extract upper 4-bytes from data - regNumber tmpReg = codeGen->internalRegisters.GetSingle(tmpRegProvider); - emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, dataReg, 2); + if (codeGen->internalRegisters.Count(tmpRegProvider) == 0) + { + // We don't have temp regs - let's do two shuffles then + + // [0,1,2,3] -> [2,3,0,1] + emitIns_R_R_R_I(INS_ext, EA_16BYTE, dataReg, dataReg, dataReg, 8, INS_OPTS_16B); + + // store lower 4 bytes + emitIns_S_R(INS_str, EA_4BYTE, dataReg, varNum, offset + 8); + + // Restore dataReg to its previous state: [2,3,0,1] -> [0,1,2,3] + emitIns_R_R_R_I(INS_ext, EA_16BYTE, dataReg, dataReg, dataReg, 8, INS_OPTS_16B); + } + else + { + // Extract upper 4-bytes from data + regNumber tmpReg = codeGen->internalRegisters.GetSingle(tmpRegProvider); + emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, dataReg, 2); - // 4-byte write - emitIns_S_R(INS_str, EA_4BYTE, tmpReg, varNum, offset + 8); + // 4-byte write + emitIns_S_R(INS_str, EA_4BYTE, tmpReg, varNum, offset + 8); + } } #endif // FEATURE_SIMD diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 2cc3f6bbaa8f6..8977065588e35 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -5620,7 +5620,7 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) // varNum - the variable on the stack to use as a base; // offset - the offset from the varNum; // dataReg - the src reg with SIMD12 value; -// tmpRegProvider - a tree to grab a tmp reg from if needed. +// tmpRegProvider - a tree to grab a tmp reg from if needed (can be null). // void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNumber dataReg, GenTree* tmpRegProvider) { @@ -5635,7 +5635,7 @@ void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNu // Extract and store upper 4 bytes emitIns_S_R_I(INS_extractps, EA_16BYTE, varNum, offset + 8, dataReg, 2); } - else + else if (tmpRegProvider != nullptr) { regNumber tmpReg = codeGen->internalRegisters.GetSingle(tmpRegProvider); assert(isFloatReg(tmpReg)); @@ -5646,6 +5646,19 @@ void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNu // Store upper 4 bytes emitIns_S_R(INS_movss, EA_4BYTE, tmpReg, varNum, offset + 8); } + else + { + // We don't have temp regs - let's do two shuffles then + + // [0,1,2,3] -> [2,3,0,1] + emitIns_R_R_I(INS_pshufd, EA_16BYTE, dataReg, dataReg, 78); + + // Store upper 4 bytes + emitIns_S_R(INS_movss, EA_4BYTE, dataReg, varNum, offset + 8); + + // Restore dataReg to its previous state: [2,3,0,1] -> [0,1,2,3] + emitIns_R_R_I(INS_pshufd, EA_16BYTE, dataReg, dataReg, 78); + } } #endif // FEATURE_SIMD diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index eb9817b560e97..816de96735f1d 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -8596,7 +8596,15 @@ void LinearScan::insertMove( // This var can't be marked lvRegister now varDsc->SetRegNum(REG_STK); - GenTree* src = compiler->gtNewLclvNode(lclNum, varDsc->TypeGet()); + var_types typ = varDsc->TypeGet(); +#if defined(FEATURE_SIMD) + if ((typ == TYP_SIMD12) && compiler->lvaMapSimd12ToSimd16(varDsc)) + { + typ = TYP_SIMD16; + } +#endif + + GenTree* src = compiler->gtNewLclvNode(lclNum, typ); SetLsraAdded(src); // There are three cases we need to handle: diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.cs b/src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.cs new file mode 100644 index 0000000000000..5c2a5c890165d --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.cs @@ -0,0 +1,37 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using Xunit; + +public unsafe class Runtime_95043 +{ + [MethodImpl(MethodImplOptions.NoInlining)] + private static void Sweep(int ecx, int* edx, Vector3 stack12) + { + for (int i = 0; i < 5; i++) + { + Console.WriteLine(ecx); + if (ecx == -1) + { + ecx = edx[0]; + } + else if (!float.IsNaN(stack12.X)) + { + edx[0] = -1; + ecx = -1; + } + } + } + + [Fact] + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Test() + { + int x = 42; + Sweep(1, &x, Vector3.Zero); + Assert.Equal(-1, x); + } +} diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.csproj new file mode 100644 index 0000000000000..1981001110bbe --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.csproj @@ -0,0 +1,9 @@ + + + True + true + + + + +