Skip to content

Commit

Permalink
[AArch64] prevent (shl (srl x, c1), c2) -> (and (shift x, c3)) when load
Browse files Browse the repository at this point in the history
Currently, process of replacing bitwise operations consisting of
`(shl (srl x, c1), c2)` with `And` is performed by `DAGCombiner`.

However, in certain case like `(shl (srl, x, c1) 2)` is do not
need to transform to `AND` if it was used to `Load` Target.

Consider following case:
```
        lsr x8, x8, #56
        and x8, x8, #0xfc
        ldr w0, [x2, x8]
        ret
```

In this case, we can remove the `AND` by changing the target of `LDR`
to `[X2, X8, LSL #2]` and right-shifting amount change to 56 to 58.

after changed:
```
        lsr x8, x8, #58
        ldr w0, [x2, x8, lsl #2]
        ret
```

This patch checks to see if the `(shl (srl x, c1) 2)` operation on
`load` target can be prevent transform to `And`.
  • Loading branch information
ParkHanbum committed May 31, 2024
1 parent 5251d57 commit 378b159
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 9 deletions.
48 changes: 47 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,7 @@ namespace {
SDValue visitFMULForFMADistributiveCombine(SDNode *N);

SDValue XformToShuffleWithZero(SDNode *N);
bool isCanBeLoadedWithLsl(SDNode *N);
bool reassociationCanBreakAddressingModePattern(unsigned Opc,
const SDLoc &DL,
SDNode *N,
Expand Down Expand Up @@ -9893,7 +9894,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// folding this will increase the total number of instructions.
if (N0.getOpcode() == ISD::SRL &&
(N0.getOperand(1) == N1 || N0.hasOneUse()) &&
TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
TLI.shouldFoldConstantShiftPairToMask(N, Level) &&
!isCanBeLoadedWithLsl(N)) {
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
/*AllowUndefs*/ false,
/*AllowTypeMismatch*/ true)) {
Expand Down Expand Up @@ -28338,6 +28340,50 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
return false;
}

bool DAGCombiner::isCanBeLoadedWithLsl(SDNode *N) {
if (!N->hasOneUse())
return false;

APInt SrlAmt;
if (sd_match(N,
m_Shl(m_Srl(m_Value(), m_ConstInt(SrlAmt)), m_SpecificInt(2)))) {
// Srl knownbits
SDValue ShlV = SDValue(N, 0);
unsigned RegSize = ShlV.getValueType().getScalarSizeInBits();
KnownBits Known = DAG.computeKnownBits(ShlV);
if (Known.getBitWidth() != RegSize)
return false;

// check load (ldr x, (add x, (shl (srl x, c1) 2)))
SDNode *User = N->use_begin().getUse().getUser();
if (!User || User->getOpcode() != ISD::ADD)
return false;

SDNode *Load = User->use_begin().getUse().getUser();
if (!Load || Load->getOpcode() != ISD::LOAD)
return false;

auto LoadN = dyn_cast<LoadSDNode>(Load);
if (!LoadN)
return false;

TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.BaseOffs = Known.getMaxValue().getZExtValue();
EVT VT = LoadN->getMemoryVT();
unsigned AS = LoadN->getAddressSpace();
Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
return false;

if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT))
return false;
return true;
}

return false;
}

/// This is the entry point for the file.
void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
CodeGenOptLevel OptLevel) {
Expand Down
14 changes: 6 additions & 8 deletions llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@ define i32 @load_shr63(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load_shr63:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: lsr x8, x8, #61
; CHECK-NEXT: and x8, x8, #0x4
; CHECK-NEXT: ldr w0, [x2, x8]
; CHECK-NEXT: lsr x8, x8, #63
; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
Expand All @@ -35,8 +34,8 @@ define i32 @load_shr2(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load_shr2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: and x8, x8, #0xfffffffffffffffc
; CHECK-NEXT: ldr w0, [x2, x8]
; CHECK-NEXT: lsr x8, x8, #2
; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
Expand All @@ -50,9 +49,8 @@ define i32 @load_shr1(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load_shr1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: lsl x8, x8, #1
; CHECK-NEXT: and x8, x8, #0xfffffffffffffffc
; CHECK-NEXT: ldr w0, [x2, x8]
; CHECK-NEXT: lsr x8, x8, #1
; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
Expand Down

0 comments on commit 378b159

Please sign in to comment.