Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix perf issues discovered in "For software performance, can you always trust inlining" blog #61408

Merged
merged 9 commits into from
Dec 7, 2021
4 changes: 1 addition & 3 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -894,12 +894,10 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0;
const bool makeInlineObservations = (compInlineResult != nullptr);
const bool isInlining = compIsForInlining();
const bool isPreJit = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT);
const bool isTier1 = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1);
unsigned retBlocks = 0;
int prefixFlags = 0;
bool preciseScan = makeInlineObservations && compInlineResult->GetPolicy()->RequiresPreciseScan();
const bool resolveTokens = preciseScan && (isPreJit || isTier1);
const bool resolveTokens = preciseScan;
EgorBo marked this conversation as resolved.
Show resolved Hide resolved

// Track offsets where IL instructions begin in DEBUG builds. Used to
// validate debug info generated by the JIT.
Expand Down
63 changes: 46 additions & 17 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2620,29 +2620,58 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp)
unsigned level = 0;
unsigned lvl2 = 0;

#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
if (multiOp->OperIs(GT_HWINTRINSIC) && (multiOp->GetOperandCount() == 1) &&
multiOp->AsHWIntrinsic()->OperIsMemoryLoadOrStore())
#if defined(FEATURE_HW_INTRINSICS)
if (multiOp->OperIs(GT_HWINTRINSIC))
{
costEx = IND_COST_EX;
costSz = 2;
GenTreeHWIntrinsic* hwTree = multiOp->AsHWIntrinsic();
#if defined(TARGET_XARCH)
if ((hwTree->GetOperandCount() == 1) && hwTree->OperIsMemoryLoadOrStore())
{
costEx = IND_COST_EX;
costSz = 2;

GenTree* addr = multiOp->Op(1)->gtEffectiveVal();
level = gtSetEvalOrder(addr);
GenTree* addr = hwTree->Op(1)->gtEffectiveVal();
level = gtSetEvalOrder(addr);

// See if we can form a complex addressing mode.
if (addr->OperIs(GT_ADD) && gtMarkAddrMode(addr, &costEx, &costSz, multiOp->TypeGet()))
{
// Nothing to do, costs have been set.
// See if we can form a complex addressing mode.
if (addr->OperIs(GT_ADD) && gtMarkAddrMode(addr, &costEx, &costSz, hwTree->TypeGet()))
{
// Nothing to do, costs have been set.
}
else
{
costEx += addr->GetCostEx();
costSz += addr->GetCostSz();
}

hwTree->SetCosts(costEx, costSz);
return level;
}
else
#endif
switch (hwTree->GetHWIntrinsicId())
{
costEx += addr->GetCostEx();
costSz += addr->GetCostSz();
#if defined(TARGET_XARCH)
case NI_Vector128_Create:
case NI_Vector256_Create:
#elif defined(TARGET_ARM64)
case NI_Vector64_Create:
case NI_Vector128_Create:
#endif
{
if ((hwTree->GetOperandCount() == 1) && hwTree->Op(1)->OperIsConst())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we only doing OperandCount == 1?

What about the cases where OperandCount == 2 through OperandCount == 32? Are those being properly tracked as "expensive" and getting CSEd?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tannergooding yeah, they are assigned a higher cost automatically due to multiple arguments so the problem doesn't reproduce for them. but that's a good point, I guess Vector128.Create(1,2,3,4,5,6,7,8) currently gets a very high cost while in reality it should still be 3/2

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think its fine to log an issue for and cover in a separate issue here.

{
// Vector.Create(cns) is cheap but not that cheap to be (1,1)
costEx = IND_COST_EX;
costSz = 2;
level = gtSetEvalOrder(hwTree->Op(1));
hwTree->SetCosts(costEx, costSz);
return level;
}
break;
}
default:
break;
}

multiOp->SetCosts(costEx, costSz);
return level;
}
#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS)

Expand Down