Skip to content

Commit

Permalink
[AArch64] prevent (shl (srl x, c1), c2) -> (and (shift x, c3)) when load
Browse files Browse the repository at this point in the history
Currently, process of replacing bitwise operations consisting of
`(shl (srl x, c1), c2)` with `And` is performed by `DAGCombiner`.

However, in certain case like `(shl (srl, x, c1) 2)` is do not
need to transform to `AND` if it was used to `Load` Target.

Consider following case:
```
        lsr x8, x8, #56
        and x8, x8, #0xfc
        ldr w0, [x2, x8]
        ret
```

In this case, we can remove the `AND` by changing the target of `LDR`
to `[X2, X8, LSL #2]` and right-shifting amount change to 56 to 58.

after changed:
```
        lsr x8, x8, #58
        ldr w0, [x2, x8, lsl #2]
        ret
```

This patch checks to see if the `(shl (srl x, c1) 2)` operation on
`load` target can be prevent transform to `And`.
  • Loading branch information
ParkHanbum committed Jun 6, 2024
1 parent 1b500fb commit b380592
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 25 deletions.
17 changes: 17 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16918,6 +16918,23 @@ bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
}

// We do not need to fold when this shifting used in specific load case:
// (ldr x, (add x, (shl (srl x, c1) 2)))
if (N->getOpcode() == ISD::SHL && N->hasOneUse()) {
if (auto C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
unsigned ShlAmt = C2->getZExtValue();
if (auto ShouldADD = *N->use_begin();
ShouldADD->getOpcode() == ISD::ADD && ShouldADD->hasOneUse()) {
if (auto ShouldLOAD = dyn_cast<LoadSDNode>(*ShouldADD->use_begin())) {
unsigned ByteVT = ShouldLOAD->getMemoryVT().getSizeInBits() / 8;
if ((1 << ShlAmt) == ByteVT &&
isIndexedLoadLegal(ISD::PRE_INC, ShouldLOAD->getMemoryVT()))
return false;
}
}
}
}

return true;
}

Expand Down
43 changes: 18 additions & 25 deletions llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@ define i16 @load16_shr63(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load16_shr63:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: lsr x8, x8, #62
; CHECK-NEXT: and x8, x8, #0x2
; CHECK-NEXT: ldrh w0, [x2, x8]
; CHECK-NEXT: lsr x8, x8, #63
; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
Expand All @@ -22,9 +21,8 @@ define i16 @load16_shr2(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load16_shr2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: lsr x8, x8, #1
; CHECK-NEXT: and x8, x8, #0x7ffffffffffffffe
; CHECK-NEXT: ldrh w0, [x2, x8]
; CHECK-NEXT: lsr x8, x8, #2
; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
Expand All @@ -38,8 +36,8 @@ define i16 @load16_shr1(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load16_shr1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: and x8, x8, #0xfffffffffffffffe
; CHECK-NEXT: ldrh w0, [x2, x8]
; CHECK-NEXT: lsr x8, x8, #1
; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
Expand All @@ -53,9 +51,8 @@ define i32 @load32_shr63(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load32_shr63:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: lsr x8, x8, #61
; CHECK-NEXT: and x8, x8, #0x4
; CHECK-NEXT: ldr w0, [x2, x8]
; CHECK-NEXT: lsr x8, x8, #63
; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
Expand All @@ -69,8 +66,8 @@ define i32 @load32_shr2(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load32_shr2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: and x8, x8, #0xfffffffffffffffc
; CHECK-NEXT: ldr w0, [x2, x8]
; CHECK-NEXT: lsr x8, x8, #2
; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
Expand All @@ -84,9 +81,8 @@ define i32 @load32_shr1(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load32_shr1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: lsl x8, x8, #1
; CHECK-NEXT: and x8, x8, #0xfffffffffffffffc
; CHECK-NEXT: ldr w0, [x2, x8]
; CHECK-NEXT: lsr x8, x8, #1
; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
Expand All @@ -100,9 +96,8 @@ define i64 @load64_shr63(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load64_shr63:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: lsr x8, x8, #60
; CHECK-NEXT: and x8, x8, #0x8
; CHECK-NEXT: ldr x0, [x2, x8]
; CHECK-NEXT: lsr x8, x8, #63
; CHECK-NEXT: ldr x0, [x2, x8, lsl #3]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
Expand All @@ -116,9 +111,8 @@ define i64 @load64_shr2(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load64_shr2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: lsl x8, x8, #1
; CHECK-NEXT: and x8, x8, #0xfffffffffffffff8
; CHECK-NEXT: ldr x0, [x2, x8]
; CHECK-NEXT: lsr x8, x8, #2
; CHECK-NEXT: ldr x0, [x2, x8, lsl #3]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
Expand All @@ -132,9 +126,8 @@ define i64 @load64_shr1(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load64_shr1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
; CHECK-NEXT: lsl x8, x8, #2
; CHECK-NEXT: and x8, x8, #0xfffffffffffffff8
; CHECK-NEXT: ldr x0, [x2, x8]
; CHECK-NEXT: lsr x8, x8, #1
; CHECK-NEXT: ldr x0, [x2, x8, lsl #3]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
Expand Down

0 comments on commit b380592

Please sign in to comment.