Skip to content

Commit

Permalink
Fix SIMD12 spills (#102736)
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorBo authored Jun 13, 2024
1 parent 7e2e874 commit 3f9a441
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 10 deletions.
15 changes: 13 additions & 2 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -937,9 +937,20 @@ void CodeGen::genSpillVar(GenTree* tree)
// but we will kill the var in the reg (below).
if (!varDsc->IsAlwaysAliveInMemory())
{
instruction storeIns = ins_Store(lclType, compiler->isSIMDTypeLocalAligned(varNum));
assert(varDsc->GetRegNum() == tree->GetRegNum());
inst_TT_RV(storeIns, size, tree, tree->GetRegNum());
#if defined(FEATURE_SIMD)
if (lclType == TYP_SIMD12)
{
// Store SIMD12 to stack as 12 bytes
GetEmitter()->emitStoreSimd12ToLclOffset(varNum, tree->AsLclVarCommon()->GetLclOffs(),
tree->GetRegNum(), nullptr);
}
else
#endif
{
instruction storeIns = ins_Store(lclType, compiler->isSIMDTypeLocalAligned(varNum));
inst_TT_RV(storeIns, size, tree, tree->GetRegNum());
}
}

// We should only have both GTF_SPILL (i.e. the flag causing this method to be called) and
Expand Down
26 changes: 21 additions & 5 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17064,12 +17064,28 @@ void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNu
// store lower 8 bytes
emitIns_S_R(INS_str, EA_8BYTE, dataReg, varNum, offset);

// Extract upper 4-bytes from data
regNumber tmpReg = codeGen->internalRegisters.GetSingle(tmpRegProvider);
emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, dataReg, 2);
if (codeGen->internalRegisters.Count(tmpRegProvider) == 0)
{
// We don't have temp regs - let's do two shuffles then

// [0,1,2,3] -> [2,3,0,1]
emitIns_R_R_R_I(INS_ext, EA_16BYTE, dataReg, dataReg, dataReg, 8, INS_OPTS_16B);

// store lower 4 bytes
emitIns_S_R(INS_str, EA_4BYTE, dataReg, varNum, offset + 8);

// Restore dataReg to its previous state: [2,3,0,1] -> [0,1,2,3]
emitIns_R_R_R_I(INS_ext, EA_16BYTE, dataReg, dataReg, dataReg, 8, INS_OPTS_16B);
}
else
{
// Extract upper 4-bytes from data
regNumber tmpReg = codeGen->internalRegisters.GetSingle(tmpRegProvider);
emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, dataReg, 2);

// 4-byte write
emitIns_S_R(INS_str, EA_4BYTE, tmpReg, varNum, offset + 8);
// 4-byte write
emitIns_S_R(INS_str, EA_4BYTE, tmpReg, varNum, offset + 8);
}
}
#endif // FEATURE_SIMD

Expand Down
17 changes: 15 additions & 2 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5620,7 +5620,7 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
// varNum - the variable on the stack to use as a base;
// offset - the offset from the varNum;
// dataReg - the src reg with SIMD12 value;
// tmpRegProvider - a tree to grab a tmp reg from if needed.
// tmpRegProvider - a tree to grab a tmp reg from if needed (can be null).
//
void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNumber dataReg, GenTree* tmpRegProvider)
{
Expand All @@ -5635,7 +5635,7 @@ void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNu
// Extract and store upper 4 bytes
emitIns_S_R_I(INS_extractps, EA_16BYTE, varNum, offset + 8, dataReg, 2);
}
else
else if (tmpRegProvider != nullptr)
{
regNumber tmpReg = codeGen->internalRegisters.GetSingle(tmpRegProvider);
assert(isFloatReg(tmpReg));
Expand All @@ -5646,6 +5646,19 @@ void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNu
// Store upper 4 bytes
emitIns_S_R(INS_movss, EA_4BYTE, tmpReg, varNum, offset + 8);
}
else
{
// We don't have temp regs - let's do two shuffles then

// [0,1,2,3] -> [2,3,0,1]
emitIns_R_R_I(INS_pshufd, EA_16BYTE, dataReg, dataReg, 78);

// Store upper 4 bytes
emitIns_S_R(INS_movss, EA_4BYTE, dataReg, varNum, offset + 8);

// Restore dataReg to its previous state: [2,3,0,1] -> [0,1,2,3]
emitIns_R_R_I(INS_pshufd, EA_16BYTE, dataReg, dataReg, 78);
}
}
#endif // FEATURE_SIMD

Expand Down
10 changes: 9 additions & 1 deletion src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8596,7 +8596,15 @@ void LinearScan::insertMove(
// This var can't be marked lvRegister now
varDsc->SetRegNum(REG_STK);

GenTree* src = compiler->gtNewLclvNode(lclNum, varDsc->TypeGet());
var_types typ = varDsc->TypeGet();
#if defined(FEATURE_SIMD)
if ((typ == TYP_SIMD12) && compiler->lvaMapSimd12ToSimd16(varDsc))
{
typ = TYP_SIMD16;
}
#endif

GenTree* src = compiler->gtNewLclvNode(lclNum, typ);
SetLsraAdded(src);

// There are three cases we need to handle:
Expand Down
37 changes: 37 additions & 0 deletions src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using Xunit;

public unsafe class Runtime_95043
{
[MethodImpl(MethodImplOptions.NoInlining)]
private static void Sweep(int ecx, int* edx, Vector3 stack12)
{
for (int i = 0; i < 5; i++)
{
Console.WriteLine(ecx);
if (ecx == -1)
{
ecx = edx[0];
}
else if (!float.IsNaN(stack12.X))
{
edx[0] = -1;
ecx = -1;
}
}
}

[Fact]
[MethodImpl(MethodImplOptions.NoInlining)]
public static void Test()
{
int x = 42;
Sweep(1, &x, Vector3.Zero);
Assert.Equal(-1, x);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Optimize>True</Optimize>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>

0 comments on commit 3f9a441

Please sign in to comment.