diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 7fee192a419f2..9b621a1a816d1 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -2637,18 +2637,16 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) srcReg = REG_ZR; } - regNumber dstReg = dstAddrBaseReg; - int dstRegAddrAlignment = 0; - bool isDstRegAddrAlignmentKnown = false; + regNumber dstReg = dstAddrBaseReg; + int dstRegAddrAlignment = 0; if (dstLclNum != BAD_VAR_NUM) { bool fpBased; const int baseAddr = compiler->lvaFrameAddress(dstLclNum, &fpBased); - dstReg = fpBased ? REG_FPBASE : REG_SPBASE; - dstRegAddrAlignment = fpBased ? (genSPtoFPdelta() % 16) : 0; - isDstRegAddrAlignmentKnown = true; + dstReg = fpBased ? REG_FPBASE : REG_SPBASE; + dstRegAddrAlignment = fpBased ? (genSPtoFPdelta() % 16) : 0; helper.SetDstOffset(baseAddr + dstOffset); } @@ -2670,11 +2668,7 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) bool shouldUse16ByteWideInstrs = false; - // Store operations that cross a 16-byte boundary reduce bandwidth or incur additional latency. - // The following condition prevents using 16-byte stores when dstRegAddrAlignment is: - // 1) unknown (i.e. dstReg is neither FP nor SP) or - // 2) non-zero (i.e. dstRegAddr is not 16-byte aligned). - const bool hasAvailableSimdReg = isDstRegAddrAlignmentKnown && (size > FP_REGSIZE_BYTES); + const bool hasAvailableSimdReg = (size > FP_REGSIZE_BYTES); const bool canUse16ByteWideInstrs = hasAvailableSimdReg && (dstRegAddrAlignment == 0) && helper.CanEncodeAllOffsets(FP_REGSIZE_BYTES); @@ -2825,35 +2819,26 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) #ifdef TARGET_ARM64 CopyBlockUnrollHelper helper(srcOffset, dstOffset, size); - - regNumber srcReg = srcAddrBaseReg; - int srcRegAddrAlignment = 0; - bool isSrcRegAddrAlignmentKnown = false; + regNumber srcReg = srcAddrBaseReg; if (srcLclNum != BAD_VAR_NUM) { bool fpBased; const int baseAddr = compiler->lvaFrameAddress(srcLclNum, &fpBased); - srcReg = fpBased ? REG_FPBASE : REG_SPBASE; - srcRegAddrAlignment = fpBased ? (genSPtoFPdelta() % 16) : 0; - isSrcRegAddrAlignmentKnown = true; + srcReg = fpBased ? REG_FPBASE : REG_SPBASE; helper.SetSrcOffset(baseAddr + srcOffset); } - regNumber dstReg = dstAddrBaseReg; - int dstRegAddrAlignment = 0; - bool isDstRegAddrAlignmentKnown = false; + regNumber dstReg = dstAddrBaseReg; if (dstLclNum != BAD_VAR_NUM) { bool fpBased; const int baseAddr = compiler->lvaFrameAddress(dstLclNum, &fpBased); - dstReg = fpBased ? REG_FPBASE : REG_SPBASE; - dstRegAddrAlignment = fpBased ? (genSPtoFPdelta() % 16) : 0; - isDstRegAddrAlignmentKnown = true; + dstReg = fpBased ? REG_FPBASE : REG_SPBASE; helper.SetDstOffset(baseAddr + dstOffset); } @@ -2914,8 +2899,7 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) // known and the block size is larger than a single SIMD register size (i.e. when using SIMD instructions can // be profitable). - const bool canUse16ByteWideInstrs = isSrcRegAddrAlignmentKnown && isDstRegAddrAlignmentKnown && - (size >= 2 * FP_REGSIZE_BYTES) && (srcRegAddrAlignment == dstRegAddrAlignment); + const bool canUse16ByteWideInstrs = (size >= 2 * FP_REGSIZE_BYTES); bool shouldUse16ByteWideInstrs = false; diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index efa0de76761aa..7ce916d0a305c 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -212,7 +212,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // the target. We do not handle these constraints on the same // refposition too well so we help ourselves a bit here by forcing the // null check with LR. - regMaskTP candidates = call->IsFastTailCall() ? RBM_LR : 0; + regMaskTP candidates = call->IsFastTailCall() ? RBM_LR : RBM_NONE; buildInternalIntRegisterDefForNode(call, candidates); } #endif // TARGET_ARM @@ -632,9 +632,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) buildInternalIntRegisterDefForNode(blkNode); } - const bool isDstRegAddrAlignmentKnown = dstAddr->OperIsLocalAddr(); - - if (isDstRegAddrAlignmentKnown && (size > FP_REGSIZE_BYTES)) + if (size > FP_REGSIZE_BYTES) { // For larger block sizes CodeGen can choose to use 16-byte SIMD instructions. buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates()); @@ -710,10 +708,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) ((srcAddrOrFill != nullptr) && srcAddrOrFill->OperIsLocalAddr()); const bool isDstAddrLocal = dstAddr->OperIsLocalAddr(); - // CodeGen can use 16-byte SIMD ldp/stp for larger block sizes - // only when both source and destination base address registers have known alignment. + // CodeGen can use 16-byte SIMD ldp/stp for larger block sizes. // This is the case, when both registers are either sp or fp. - bool canUse16ByteWideInstrs = isSrcAddrLocal && isDstAddrLocal && (size >= 2 * FP_REGSIZE_BYTES); + bool canUse16ByteWideInstrs = (size >= 2 * FP_REGSIZE_BYTES); // Note that the SIMD registers allocation is speculative - LSRA doesn't know at this point // whether CodeGen will use SIMD registers (i.e. if such instruction sequence will be more optimal).