Skip to content

Commit

Permalink
Delete mov_i2xmm and mov_xmm2i. (#47843)
Browse files Browse the repository at this point in the history
* Delete mov_i2xmm and mov_xmm2i.

* Delete `ins_CopyFloatToInt`.

* Delete 'ins_CopyIntToFloat'.

* review feedback
  • Loading branch information
Sergey Andreenko authored Feb 9, 2021
1 parent bf9f899 commit 5b0005a
Show file tree
Hide file tree
Showing 9 changed files with 103 additions and 264 deletions.
2 changes: 0 additions & 2 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1450,8 +1450,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

instruction ins_Copy(var_types dstType);
instruction ins_Copy(regNumber srcReg, var_types dstType);
instruction ins_CopyIntToFloat(var_types srcType, var_types dstTyp);
instruction ins_CopyFloatToInt(var_types srcType, var_types dstTyp);
static instruction ins_FloatStore(var_types type = TYP_DOUBLE);
static instruction ins_FloatCopy(var_types type = TYP_DOUBLE);
instruction ins_FloatConv(var_types to, var_types from);
Expand Down
47 changes: 5 additions & 42 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4437,17 +4437,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
}
#endif
instruction copyIns = ins_Copy(regNum, destMemType);
#if defined(TARGET_XARCH)
// For INS_mov_xmm2i, the source xmm reg comes first.
if (copyIns == INS_mov_xmm2i)
{
GetEmitter()->emitIns_R_R(copyIns, size, regNum, destRegNum);
}
else
#endif // TARGET_XARCH
{
GetEmitter()->emitIns_R_R(copyIns, size, destRegNum, regNum);
}
GetEmitter()->emitIns_R_R(copyIns, size, destRegNum, regNum);
#ifdef USING_SCOPE_INFO
psiMoveToReg(varNum);
#endif // USING_SCOPE_INFO
Expand Down Expand Up @@ -12067,42 +12057,15 @@ void CodeGen::genRegCopy(GenTree* treeNode)
}
return;
}

regNumber srcReg = genConsumeReg(op1);
var_types targetType = treeNode->TypeGet();
regNumber targetReg = treeNode->GetRegNum();
assert(srcReg != REG_NA);
assert(targetReg != REG_NA);
assert(targetType != TYP_STRUCT);

// Check whether this node and the node from which we're copying the value have
// different register types. This can happen if (currently iff) we have a SIMD
// vector type that fits in an integer register, in which case it is passed as
// an argument, or returned from a call, in an integer register and must be
// copied if it's in an xmm register.

bool srcFltReg = (varTypeUsesFloatReg(op1));
bool tgtFltReg = (varTypeUsesFloatReg(treeNode));
if (srcFltReg != tgtFltReg)
{
instruction ins;
regNumber fpReg;
regNumber intReg;
if (tgtFltReg)
{
ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet());
fpReg = targetReg;
intReg = op1->GetRegNum();
}
else
{
ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet());
intReg = targetReg;
fpReg = op1->GetRegNum();
}
inst_RV_RV(ins, fpReg, intReg, targetType);
}
else
{
inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
}
inst_RV_RV(ins_Copy(srcReg, targetType), targetReg, srcReg, targetType);

if (op1->IsLocal())
{
Expand Down
45 changes: 14 additions & 31 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2743,7 +2743,7 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
}
else
{
emit->emitIns_R_R(INS_mov_i2xmm, EA_PTRSIZE, srcXmmReg, srcIntReg);
emit->emitIns_R_R(INS_movd, EA_PTRSIZE, srcXmmReg, srcIntReg);
emit->emitIns_R_R(INS_punpckldq, EA_16BYTE, srcXmmReg, srcXmmReg);
#ifdef TARGET_X86
// For x86, we need one more to convert it from 8 bytes to 16 bytes.
Expand Down Expand Up @@ -5039,9 +5039,9 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
// integer and floating point registers so, let's do that.
if (call->IsVarargs() && varTypeIsFloating(argNode))
{
regNumber targetReg = compiler->getCallArgIntRegister(argNode->GetRegNum());
instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG);
inst_RV_RV(ins, argNode->GetRegNum(), targetReg);
regNumber srcReg = argNode->GetRegNum();
regNumber targetReg = compiler->getCallArgIntRegister(argNode->GetRegNum());
inst_RV_RV(ins_Copy(srcReg, TYP_LONG), targetReg, srcReg);
}
#endif // FEATURE_VARARG
}
Expand Down Expand Up @@ -5783,9 +5783,8 @@ void CodeGen::genJmpMethod(GenTree* jmp)

if (varTypeIsFloating(loadType))
{
intArgReg = compiler->getCallArgIntRegister(argReg);
instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
inst_RV_RV(ins, argReg, intArgReg, loadType);
intArgReg = compiler->getCallArgIntRegister(argReg);
inst_RV_RV(ins_Copy(argReg, TYP_LONG), intArgReg, argReg, loadType);
}
else
{
Expand Down Expand Up @@ -5824,7 +5823,6 @@ void CodeGen::genJmpMethod(GenTree* jmp)
regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
if (remainingIntArgMask != RBM_NONE)
{
instruction insCopyIntToFloat = ins_CopyIntToFloat(TYP_LONG, TYP_DOUBLE);
GetEmitter()->emitDisableGC();
for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
{
Expand All @@ -5838,7 +5836,7 @@ void CodeGen::genJmpMethod(GenTree* jmp)

// also load it in corresponding float arg reg
regNumber floatReg = compiler->getCallArgFloatRegister(argReg);
inst_RV_RV(insCopyIntToFloat, floatReg, argReg);
inst_RV_RV(ins_Copy(argReg, TYP_DOUBLE), floatReg, argReg);
}

argOffset += REGSIZE_BYTES;
Expand Down Expand Up @@ -6591,8 +6589,9 @@ void CodeGen::genCkfinite(GenTree* treeNode)
// Copy the floating-point value to an integer register. If we copied a float to a long, then
// right-shift the value so the high 32 bits of the floating-point value sit in the low 32
// bits of the integer register.
instruction ins = ins_CopyFloatToInt(targetType, (targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG);
inst_RV_RV(ins, op1->GetRegNum(), tmpReg, targetType);
regNumber srcReg = op1->GetRegNum();
var_types targetIntType = ((targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG);
inst_RV_RV(ins_Copy(srcReg, targetIntType), tmpReg, srcReg, targetType);
if (targetType == TYP_DOUBLE)
{
// right shift by 32 bits to get to exponent.
Expand Down Expand Up @@ -6661,7 +6660,7 @@ void CodeGen::genCkfinite(GenTree* treeNode)

// Copy only the low 32 bits. This will be the high order 32 bits of the floating-point
// value, no matter the floating-point type.
inst_RV_RV(ins_CopyFloatToInt(TYP_FLOAT, TYP_INT), copyToTmpSrcReg, tmpReg, TYP_FLOAT);
inst_RV_RV(ins_Copy(copyToTmpSrcReg, TYP_INT), tmpReg, copyToTmpSrcReg, TYP_FLOAT);

// Mask exponent with all 1's and check if the exponent is all 1's
inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
Expand Down Expand Up @@ -7082,22 +7081,7 @@ void CodeGen::genBitCast(var_types targetType, regNumber targetReg, var_types sr
assert(dstFltReg == genIsValidFloatReg(targetReg));
if (srcFltReg != dstFltReg)
{
instruction ins;
regNumber fltReg;
regNumber intReg;
if (dstFltReg)
{
ins = ins_CopyIntToFloat(srcType, targetType);
fltReg = targetReg;
intReg = srcReg;
}
else
{
ins = ins_CopyFloatToInt(srcType, targetType);
intReg = targetReg;
fltReg = srcReg;
}
inst_RV_RV(ins, fltReg, intReg, targetType);
inst_RV_RV(ins_Copy(srcReg, targetType), targetReg, srcReg, targetType);
}
else if (targetReg != srcReg)
{
Expand Down Expand Up @@ -8760,9 +8744,8 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
#if FEATURE_VARARG
if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
{
regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
inst_RV_RV(ins, argReg, intArgReg, loadType);
regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
inst_RV_RV(ins_Copy(argReg, TYP_LONG), intArgReg, argReg, loadType);
}
#endif // FEATURE_VARARG
}
Expand Down
96 changes: 48 additions & 48 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,7 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr)
{
switch (ins)
{
case INS_movd: // TODO-Cleanup: replace with movq, https://github.com/dotnet/runtime/issues/47943.
case INS_andn:
case INS_bextr:
case INS_blsi:
Expand All @@ -518,8 +519,6 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr)
case INS_cvtss2si:
case INS_cvtsi2sd:
case INS_cvtsi2ss:
case INS_mov_xmm2i:
case INS_mov_i2xmm:
case INS_movnti:
case INS_mulx:
case INS_pdep:
Expand Down Expand Up @@ -1239,7 +1238,7 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
case INS_cvtsd2si:
case INS_cvtss2si:
case INS_extractps:
case INS_mov_xmm2i:
case INS_movd:
case INS_movmskpd:
case INS_movmskps:
case INS_mulx:
Expand Down Expand Up @@ -8837,15 +8836,7 @@ void emitter::emitDispIns(
case IF_RRD_RRD:
case IF_RWR_RRD:
case IF_RRW_RRD:
if (ins == INS_mov_i2xmm)
{
printf("%s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr));
}
else if (ins == INS_mov_xmm2i)
{
printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
}
else if (ins == INS_pmovmskb)
if (ins == INS_pmovmskb)
{
printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
}
Expand Down Expand Up @@ -11447,11 +11438,19 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
regNumber reg2 = id->idReg2();
emitAttr size = id->idOpSize();

// Get the 'base' opcode
code = insCodeRM(ins);
code = AddVexPrefixIfNeeded(ins, code, size);
if (IsSSEOrAVXInstruction(ins))
{
assert((ins != INS_movd) || (isFloatReg(reg1) != isFloatReg(reg2)));

if ((ins != INS_movd) || isFloatReg(reg1))
{
code = insCodeRM(ins);
}
else
{
code = insCodeMR(ins);
}
code = AddVexPrefixIfNeeded(ins, code, size);
code = insEncodeRMreg(ins, code);

if (TakesRexWPrefix(ins, size))
Expand All @@ -11461,6 +11460,9 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
}
else if ((ins == INS_movsx) || (ins == INS_movzx) || (insIsCMOV(ins)))
{
assert(hasCodeRM(ins) && !hasCodeMI(ins) && !hasCodeMR(ins));
code = insCodeRM(ins);
code = AddVexPrefixIfNeeded(ins, code, size);
code = insEncodeRMreg(ins, code) | (int)(size == EA_2BYTE);
#ifdef TARGET_AMD64

Expand All @@ -11472,6 +11474,9 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
}
else if (ins == INS_movsxd)
{
assert(hasCodeRM(ins) && !hasCodeMI(ins) && !hasCodeMR(ins));
code = insCodeRM(ins);
code = AddVexPrefixIfNeeded(ins, code, size);
code = insEncodeRMreg(ins, code);

#endif // TARGET_AMD64
Expand All @@ -11480,6 +11485,9 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
else if ((ins == INS_bsf) || (ins == INS_bsr) || (ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt) ||
(ins == INS_tzcnt))
{
assert(hasCodeRM(ins) && !hasCodeMI(ins) && !hasCodeMR(ins));
code = insCodeRM(ins);
code = AddVexPrefixIfNeeded(ins, code, size);
code = insEncodeRMreg(ins, code);
if ((ins == INS_crc32) && (size > EA_1BYTE))
{
Expand All @@ -11499,7 +11507,9 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
#endif // FEATURE_HW_INTRINSICS
else
{
code = insEncodeMRreg(ins, insCodeMR(ins));
assert(!TakesVexPrefix(ins));
code = insCodeMR(ins);
code = insEncodeMRreg(ins, code);

if (ins != INS_test)
{
Expand Down Expand Up @@ -11543,17 +11553,27 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
}
}

regNumber reg345 = REG_NA;
regNumber regFor012Bits = reg2;
regNumber regFor345Bits = REG_NA;
if (IsBMIInstruction(ins))
{
reg345 = getBmiRegNumber(ins);
regFor345Bits = getBmiRegNumber(ins);
}
if (regFor345Bits == REG_NA)
{
regFor345Bits = reg1;
}
if (reg345 == REG_NA)
if (ins == INS_movd)
{
reg345 = id->idReg1();
assert(isFloatReg(reg1) != isFloatReg(reg2));
if (isFloatReg(reg2))
{
std::swap(regFor012Bits, regFor345Bits);
}
}
unsigned regCode = insEncodeReg345(ins, reg345, size, &code);
regCode |= insEncodeReg012(ins, reg2, size, &code);

unsigned regCode = insEncodeReg345(ins, regFor345Bits, size, &code);
regCode |= insEncodeReg012(ins, regFor012Bits, size, &code);

if (TakesVexPrefix(ins))
{
Expand Down Expand Up @@ -11648,7 +11668,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
}
}

emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
emitGCregLiveUpd(id->idGCref(), reg1, dst);
break;

case IF_RRW_RRD:
Expand All @@ -11668,13 +11688,13 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)

*/
case INS_xor:
assert(id->idReg1() == id->idReg2());
emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
assert(reg1 == reg2);
emitGCregLiveUpd(id->idGCref(), reg1, dst);
break;

case INS_or:
case INS_and:
emitGCregDeadUpd(id->idReg1(), dst);
emitGCregDeadUpd(reg1, dst);
break;

case INS_add:
Expand All @@ -11691,7 +11711,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub)));
#endif
// Mark r1 as holding a byref
emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
emitGCregLiveUpd(GCT_BYREF, reg1, dst);
break;

default:
Expand Down Expand Up @@ -11773,15 +11793,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
case IF_RWR_RRD:
case IF_RRW_RRD:
case IF_RWR_RRD_RRD:
// INS_movxmm2i writes to reg2.
if (ins == INS_mov_xmm2i)
{
emitGCregDeadUpd(id->idReg2(), dst);
}
else
{
emitGCregDeadUpd(id->idReg1(), dst);
}
emitGCregDeadUpd(reg1, dst);
break;

default:
Expand Down Expand Up @@ -14681,18 +14693,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insThroughput = PERFSCORE_THROUGHPUT_25C;
break;

case INS_mov_xmm2i:
// movd reg, xmm
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency = PERFSCORE_LATENCY_2C;
break;

case INS_mov_i2xmm:
// movd xmm, reg
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency = PERFSCORE_LATENCY_1C;
break;

case INS_movd:
if (memAccessKind == PERFSCORE_MEMORY_NONE)
{
Expand Down
Loading

0 comments on commit 5b0005a

Please sign in to comment.