From 73b1897c7d9d59a9b4575f5fd18bb038cc94b85e Mon Sep 17 00:00:00 2001 From: Will Smith Date: Mon, 14 Mar 2022 11:29:12 -0700 Subject: [PATCH] ARM64 - Optimizing a % b operations (#65535) * Initial work for ARM64 mod optimization * Updated comment * Updated comment * Updated comment * Fixing build * Remove uneeded var * Use '%' morph logic for both x64/arm64 * Adding back in divisor check for x64 * Formatting * Update comments * Update comments * Fixing * Updated comment * Updated comment * Tweaking x64 transformation logic for the mod opt * Tweaking x64 transformation logic for the mod opt * Using IntCon * Fixed build * Minor tweak * Fixing x64 diffs * Removing flag set * Feedback * Fixing build * Feedback * Fixing tests * Fixing tests * Fixing tests * Formatting * Fixing tests * Feedback * Fixing build --- src/coreclr/jit/compiler.h | 1 + src/coreclr/jit/gentree.h | 47 +++++++++++++++++++ src/coreclr/jit/morph.cpp | 93 +++++++++++++++++++++----------------- 3 files changed, 100 insertions(+), 41 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 20abbab915642..291f0f9c758a1 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6489,6 +6489,7 @@ class Compiler GenTree* fgPropagateCommaThrow(GenTree* parent, GenTreeOp* commaThrow, GenTreeFlags precedingSideEffects); GenTree* fgMorphRetInd(GenTreeUnOp* tree); GenTree* fgMorphModToSubMulDiv(GenTreeOp* tree); + GenTree* fgMorphUModToAndSub(GenTreeOp* tree); GenTree* fgMorphSmpOpOptional(GenTreeOp* tree); GenTree* fgMorphMultiOp(GenTreeMultiOp* multiOp); GenTree* fgMorphConst(GenTree* tree); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index af58071a37d20..d215b4eee7678 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -2110,6 +2110,10 @@ struct GenTree inline bool IsIntegralConst() const; + inline bool IsIntegralConstUnsignedPow2() const; + + inline bool IsIntegralConstAbsPow2() const; + inline bool IsIntCnsFitsInI32(); // Constant fits in INT32 inline bool IsCnsFltOrDbl() const; @@ -8343,6 +8347,49 @@ inline bool GenTree::IsIntegralConst() const #endif // !TARGET_64BIT } +//------------------------------------------------------------------------- +// IsIntegralConstUnsignedPow2: Determines whether the unsigned value of +// an integral constant is the power of 2. +// +// Return Value: +// Returns true if the unsigned value of a GenTree's integral constant +// is the power of 2. +// +// Notes: +// Integral constant nodes store its value in signed form. +// This should handle cases where an unsigned-int was logically used in +// user code. +// +inline bool GenTree::IsIntegralConstUnsignedPow2() const +{ + if (IsIntegralConst()) + { + return isPow2((UINT64)AsIntConCommon()->IntegralValue()); + } + + return false; +} + +//------------------------------------------------------------------------- +// IsIntegralConstAbsPow2: Determines whether the absolute value of +// an integral constant is the power of 2. +// +// Return Value: +// Returns true if the absolute value of a GenTree's integral constant +// is the power of 2. +// +inline bool GenTree::IsIntegralConstAbsPow2() const +{ + if (IsIntegralConst()) + { + INT64 svalue = AsIntConCommon()->IntegralValue(); + size_t value = (svalue == SSIZE_T_MIN) ? static_cast(svalue) : static_cast(abs(svalue)); + return isPow2(value); + } + + return false; +} + // Is this node an integer constant that fits in a 32-bit signed integer (INT32) inline bool GenTree::IsIntCnsFitsInI32() { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 0dfe3674ddaa3..ba331b8511ad8 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -11442,58 +11442,34 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) #endif #endif // !TARGET_64BIT -#ifdef TARGET_ARM64 - // For ARM64 we don't have a remainder instruction, - // The architecture manual suggests the following transformation to - // generate code for such operator: - // - // a % b = a - (a / b) * b; - // - // TODO: there are special cases where it can be done better, for example - // when the modulo operation is unsigned and the divisor is a - // integer constant power of two. In this case, we can make the transform: - // - // a % b = a & (b - 1); - // - // Lower supports it for all cases except when `a` is constant, but - // in Morph we can't guarantee that `a` won't be transformed into a constant, - // so can't guarantee that lower will be able to do this optimization. + if (!optValnumCSE_phase) { - // Do "a % b = a - (a / b) * b" morph always, see TODO before this block. - bool doMorphModToSubMulDiv = true; - - if (doMorphModToSubMulDiv) +#ifdef TARGET_ARM64 + if (tree->OperIs(GT_UMOD) && op2->IsIntegralConstUnsignedPow2()) { - assert(!optValnumCSE_phase); - - tree = fgMorphModToSubMulDiv(tree->AsOp()); + // Transformation: a % b = a & (b - 1); + tree = fgMorphUModToAndSub(tree->AsOp()); op1 = tree->AsOp()->gtOp1; op2 = tree->AsOp()->gtOp2; } - } -#else // !TARGET_ARM64 - // If b is not a power of 2 constant then lowering replaces a % b - // with a - (a / b) * b and applies magic division optimization to - // a / b. The code may already contain an a / b expression (e.g. - // x = a / 10; y = a % 10;) and then we end up with redundant code. - // If we convert % to / here we give CSE the opportunity to eliminate - // the redundant division. If there's no redundant division then - // nothing is lost, lowering would have done this transform anyway. - - if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst())) - { - ssize_t divisorValue = op2->AsIntCon()->IconValue(); - size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast(divisorValue) - : static_cast(abs(divisorValue)); - - if (!isPow2(absDivisorValue)) + // ARM64 architecture manual suggests this transformation + // for the mod operator. + else +#else + // XARCH only applies this transformation if we know + // that magic division will be used - which is determined + // when 'b' is not a power of 2 constant and mod operator is signed. + // Lowering for XARCH does this optimization already, + // but is also done here to take advantage of CSE. + if (tree->OperIs(GT_MOD) && op2->IsIntegralConst() && !op2->IsIntegralConstAbsPow2()) +#endif { + // Transformation: a % b = a - (a / b) * b; tree = fgMorphModToSubMulDiv(tree->AsOp()); op1 = tree->AsOp()->gtOp1; op2 = tree->AsOp()->gtOp2; } } -#endif // !TARGET_ARM64 break; USE_HELPER_FOR_ARITH: @@ -14784,6 +14760,41 @@ GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree) return sub; } +//------------------------------------------------------------------------ +// fgMorphUModToAndSub: Transform a % b into the equivalent a & (b - 1). +// '%' must be unsigned (GT_UMOD). +// 'a' and 'b' must be integers. +// 'b' must be a constant and a power of two. +// +// Arguments: +// tree - The GT_UMOD tree to morph +// +// Returns: +// The morphed tree +// +// Notes: +// This is more optimized than calling fgMorphModToSubMulDiv. +// +GenTree* Compiler::fgMorphUModToAndSub(GenTreeOp* tree) +{ + JITDUMP("\nMorphing UMOD [%06u] to And/Sub\n", dspTreeID(tree)); + + assert(tree->OperIs(GT_UMOD)); + assert(tree->gtOp2->IsIntegralConstUnsignedPow2()); + + const var_types type = tree->TypeGet(); + + const size_t cnsValue = (static_cast(tree->gtOp2->AsIntConCommon()->IntegralValue())) - 1; + GenTree* const newTree = gtNewOperNode(GT_AND, type, tree->gtOp1, gtNewIconNode(cnsValue, type)); + + INDEBUG(newTree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + + DEBUG_DESTROY_NODE(tree->gtOp2); + DEBUG_DESTROY_NODE(tree); + + return newTree; +} + //------------------------------------------------------------------------------ // fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree. //