Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable AVX512 embedded masking for most other intrinsics #101886

Merged
merged 22 commits into from
May 9, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
d341230
Remove HW_Flag_MultiIns in favor of using HW_Flag_SpecialCodeGen
tannergooding May 4, 2024
134a43b
Add a new flag HW_Flag_InvalidNodeId
tannergooding May 4, 2024
f0cc9dc
Change HW_Flag_EmbMaskingIncompatible to be HW_Flag_EmbMaskingCompatible
tannergooding May 4, 2024
ea8ab95
Mark various compare intrinsics with HW_Flag_NoEvexSemantics
tannergooding May 4, 2024
089ee48
Marking various intrinsics as EmbBroadcastCompatible, EmbMaskingCompa…
tannergooding May 4, 2024
2252608
Applying formatting patch
tannergooding May 5, 2024
067633e
Ensure WithLower/WithUpper are not marked as InvalidNodeId
tannergooding May 5, 2024
1cefd82
Ensure that instOptions are being passed down all relevant hwintrinsi…
tannergooding May 6, 2024
08039ed
Ensure the insOpts are plumbed through for EVEX instructions
tannergooding May 6, 2024
6014628
Ensure EVEX instructions are properly annotated with EmbeddedBroadcas…
tannergooding May 6, 2024
0c9b804
Ensure that embedded broadcast/masking is displayed in the disassembly
tannergooding May 6, 2024
ed3cd21
Applying formatting patch
tannergooding May 6, 2024
57efa0a
Updating the hwintrinsic tests to cover embedded broadcast/masking
tannergooding May 6, 2024
5e6e1af
Fix some handling in the JIT related to embedded broadcast/masking
tannergooding May 6, 2024
47d01d0
Fixup some tests where validating embedded masking is non-trivial
tannergooding May 7, 2024
40ffaa3
Cleanup some cases found by SPMI
tannergooding May 7, 2024
efe1127
Ensure that CompareLessThan has its operands swapped back if its bein…
tannergooding May 7, 2024
40c0ce4
Don't regress a scenario around op_Equality and TYP_MASK
tannergooding May 8, 2024
0db9b67
Adjusting hardware intrinsic tests to test non-zero masks
tannergooding May 8, 2024
f83162d
Avoid some messiness around operand swapping
tannergooding May 8, 2024
37b9fb1
Ensure embedded masks mark TYP_SIMD16 and TYP_SIMD32 instructions as …
tannergooding May 8, 2024
e6e6272
Mark Sse2_r/Sse2_ro as AotIncompatible due to runtime/102037
tannergooding May 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 26 additions & 21 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -959,38 +959,35 @@ class CodeGen final : public CodeGenInterface
#ifdef FEATURE_HW_INTRINSICS
void genHWIntrinsic(GenTreeHWIntrinsic* node);
#if defined(TARGET_XARCH)
void genHWIntrinsic_R_RM(GenTreeHWIntrinsic* node,
instruction ins,
emitAttr attr,
regNumber reg,
GenTree* rmOp,
insOpts instOptions = INS_OPTS_NONE);
void genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival);
void genHWIntrinsic_R_RM(
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, regNumber reg, GenTree* rmOp, insOpts instOptions);
void genHWIntrinsic_R_RM_I(
GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival, insOpts instOptions);
void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, insOpts instOptions);
void genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival);
void genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr);
void genHWIntrinsic_R_R_RM_I(
GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival, insOpts instOptions);
void genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, insOpts instOptions);
void genHWIntrinsic_R_R_R_RM(instruction ins,
emitAttr attr,
regNumber targetReg,
regNumber op1Reg,
regNumber op2Reg,
GenTree* op3,
insOpts instOptions = INS_OPTS_NONE);
void genHWIntrinsic_R_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival);
insOpts instOptions);
void genHWIntrinsic_R_R_R_RM_I(
GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival, insOpts instOptions);

void genBaseIntrinsic(GenTreeHWIntrinsic* node);
void genX86BaseIntrinsic(GenTreeHWIntrinsic* node);
void genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
void genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
void genSSEIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
void genSSE2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
void genSSE41Intrinsic(GenTreeHWIntrinsic* node);
void genSSE42Intrinsic(GenTreeHWIntrinsic* node);
void genSSE41Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
void genSSE42Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
void genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
void genAESIntrinsic(GenTreeHWIntrinsic* node);
void genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
void genFMAIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions);
void genPermuteVar2x(GenTreeHWIntrinsic* node);
void genPermuteVar2x(GenTreeHWIntrinsic* node, insOpts instOptions);
void genLZCNTIntrinsic(GenTreeHWIntrinsic* node);
void genPCLMULQDQIntrinsic(GenTreeHWIntrinsic* node);
void genPOPCNTIntrinsic(GenTreeHWIntrinsic* node);
void genXCNTIntrinsic(GenTreeHWIntrinsic* node, instruction ins);
void genX86SerializeIntrinsic(GenTreeHWIntrinsic* node);
Expand All @@ -1003,6 +1000,8 @@ class CodeGen final : public CodeGenInterface
HWIntrinsicSwitchCaseBody emitSwCase);

void genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic* node, GenTree* lastOp);

static insOpts AddEmbBroadcastMode(insOpts instOptions);
#endif // defined(TARGET_XARCH)

#ifdef TARGET_ARM64
Expand Down Expand Up @@ -1576,16 +1575,22 @@ class CodeGen final : public CodeGenInterface
void inst_TT(instruction ins, emitAttr size, GenTree* op1);
void inst_RV_TT(instruction ins, emitAttr size, regNumber op1Reg, GenTree* op2);
void inst_RV_RV_IV(instruction ins, emitAttr size, regNumber reg1, regNumber reg2, unsigned ival);
void inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenTree* rmOp, int ival);
void inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenTree* rmOp, int ival, insOpts instOptions);
void inst_RV_RV_TT(instruction ins,
emitAttr size,
regNumber targetReg,
regNumber op1Reg,
GenTree* op2,
bool isRMW,
insOpts instOptions);
void inst_RV_RV_TT_IV(
instruction ins, emitAttr size, regNumber targetReg, regNumber op1Reg, GenTree* op2, int8_t ival, bool isRMW);
void inst_RV_RV_TT_IV(instruction ins,
emitAttr size,
regNumber targetReg,
regNumber op1Reg,
GenTree* op2,
int8_t ival,
bool isRMW,
insOpts instOptions);
#endif

void inst_set_SV_var(GenTree* tree);
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3856,7 +3856,7 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP&
}
#elif defined(TARGET_XARCH)
// XORPS is the fastest and smallest way to initialize a XMM register to zero.
GetEmitter()->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, reg, reg, reg);
GetEmitter()->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, reg, reg, reg, INS_OPTS_NONE);
dblInitReg = reg;
#elif defined(TARGET_ARM64)
// We will just zero out the entire vector register. This sets it to a double/float zero value
Expand Down Expand Up @@ -3896,7 +3896,7 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP&
}
#elif defined(TARGET_XARCH)
// XORPS is the fastest and smallest way to initialize a XMM register to zero.
GetEmitter()->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, reg, reg, reg);
GetEmitter()->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, reg, reg, reg, INS_OPTS_NONE);
fltInitReg = reg;
#elif defined(TARGET_ARM64)
// We will just zero out the entire vector register. This sets it to a double/float zero value
Expand Down
Loading
Loading