Skip to content

Commit

Permalink
[AArch64][SelectionDAG] Lower multiplication by a constant to shl+sub…
Browse files Browse the repository at this point in the history
…+shl+sub (llvm#90199)

Change the costmodel to lower a = b * C where C = 1 - (1 - 2^m) * 2^n to
              sub  w8, w0, w0, lsl #m
              sub  w0, w0, w8, lsl #n
Fix llvm#89430
  • Loading branch information
vfdff authored May 6, 2024
1 parent 2aaec48 commit e123643
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 2 deletions.
30 changes: 30 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17691,6 +17691,23 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
return false;
};

// Can the const C be decomposed into (1 - (1 - 2^M) * 2^N), eg:
// C = 29 is equal to 1 - (1 - 2^3) * 2^2.
auto isPowMinusMinusOneConst = [](APInt C, APInt &M, APInt &N) {
APInt CVMinus1 = C - 1;
if (CVMinus1.isNegative())
return false;
unsigned TrailingZeroes = CVMinus1.countr_zero();
APInt CVPlus1 = CVMinus1.ashr(TrailingZeroes) + 1;
if (CVPlus1.isPowerOf2()) {
unsigned BitWidth = CVPlus1.getBitWidth();
M = APInt(BitWidth, CVPlus1.logBase2());
N = APInt(BitWidth, TrailingZeroes);
return true;
}
return false;
};

if (ConstValue.isNonNegative()) {
// (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
// (mul x, 2^N - 1) => (sub (shl x, N), x)
Expand All @@ -17699,6 +17716,8 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
// => MV = (add (shl x, M), x); (add (shl MV, N), MV)
// (mul x, (2^M + 1) * 2^N + 1))
// => MV = add (shl x, M), x); add (shl MV, N), x)
// (mul x, 1 - (1 - 2^M) * 2^N))
// => MV = sub (x - (shl x, M)); sub (x - (shl MV, N))
APInt SCVMinus1 = ShiftedConstValue - 1;
APInt SCVPlus1 = ShiftedConstValue + 1;
APInt CVPlus1 = ConstValue + 1;
Expand Down Expand Up @@ -17735,6 +17754,17 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
return Add(Shl(MVal, CVN.getZExtValue()), N0);
}
}

if (Subtarget->hasALULSLFast() &&
isPowMinusMinusOneConst(ConstValue, CVM, CVN)) {
unsigned ShiftM = CVM.getZExtValue();
unsigned ShiftN = CVN.getZExtValue();
// ALULSLFast implicate that Shifts <= 4 places are fast
if (ShiftM <= 4 && ShiftN <= 4) {
SDValue MVal = Sub(N0, Shl(N0, CVM.getZExtValue()));
return Sub(N0, Shl(MVal, CVN.getZExtValue()));
}
}
} else {
// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
// (mul x, -(2^N + 1)) => - (add (shl x, N), x)
Expand Down
75 changes: 73 additions & 2 deletions llvm/test/CodeGen/AArch64/mul_pow2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,23 @@ define i32 @test25_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
ret i32 %mul
}

define i32 @test29_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
; CHECK-LABEL: test29_fast_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w8, w0, w0, lsl #3
; CHECK-NEXT: sub w0, w0, w8, lsl #2
; CHECK-NEXT: ret
;
; GISEL-LABEL: test29_fast_shift:
; GISEL: // %bb.0:
; GISEL-NEXT: mov w8, #29 // =0x1d
; GISEL-NEXT: mul w0, w0, w8
; GISEL-NEXT: ret

%mul = mul nsw i32 %x, 29 ; 29 = 1 - (1-8) * 4
ret i32 %mul
}

define i32 @test45_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
; CHECK-LABEL: test45_fast_shift:
; CHECK: // %bb.0:
Expand Down Expand Up @@ -615,6 +632,42 @@ define i32 @test97_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
ret i32 %mul
}

; Negative test: The shift number 5 is out of bound
define i32 @test125_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
; CHECK-LABEL: test125_fast_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #125 // =0x7d
; CHECK-NEXT: mul w0, w0, w8
; CHECK-NEXT: ret
;
; GISEL-LABEL: test125_fast_shift:
; GISEL: // %bb.0:
; GISEL-NEXT: mov w8, #125 // =0x7d
; GISEL-NEXT: mul w0, w0, w8
; GISEL-NEXT: ret

%mul = mul nsw i32 %x, 125 ; 125 = 1 - ((1-32) << 2)
ret i32 %mul
}

; TODO: (1 - 2^M) * (1 - 2^N)
define i32 @test225_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
; CHECK-LABEL: test225_fast_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #225 // =0xe1
; CHECK-NEXT: mul w0, w0, w8
; CHECK-NEXT: ret
;
; GISEL-LABEL: test225_fast_shift:
; GISEL: // %bb.0:
; GISEL-NEXT: mov w8, #225 // =0xe1
; GISEL-NEXT: mul w0, w0, w8
; GISEL-NEXT: ret

%mul = mul nsw i32 %x, 225 ; 225 = (1-16)*(1-16)
ret i32 %mul
}

; Negative test: The shift amount 5 larger than 4
define i32 @test297_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
; CHECK-LABEL: test297_fast_shift:
Expand All @@ -633,6 +686,24 @@ define i32 @test297_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
ret i32 %mul
}

; Negative test: The shift number 5 is out of bound
define i32 @test481_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
; CHECK-LABEL: test481_fast_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #481 // =0x1e1
; CHECK-NEXT: mul w0, w0, w8
; CHECK-NEXT: ret
;
; GISEL-LABEL: test481_fast_shift:
; GISEL: // %bb.0:
; GISEL-NEXT: mov w8, #481 // =0x1e1
; GISEL-NEXT: mul w0, w0, w8
; GISEL-NEXT: ret

%mul = mul nsw i32 %x, 481 ; 481 = 1 - ((1-16) << 5)
ret i32 %mul
}

; Convert mul x, -pow2 to shift.
; Convert mul x, -(pow2 +/- 1) to shift + add/sub.
; Lowering other negative constants are not supported yet.
Expand Down Expand Up @@ -910,9 +981,9 @@ define <4 x i32> @muladd_demand_commute(<4 x i32> %x, <4 x i32> %y) {
;
; GISEL-LABEL: muladd_demand_commute:
; GISEL: // %bb.0:
; GISEL-NEXT: adrp x8, .LCPI52_0
; GISEL-NEXT: adrp x8, .LCPI56_0
; GISEL-NEXT: movi v3.4s, #1, msl #16
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI52_0]
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI56_0]
; GISEL-NEXT: mla v1.4s, v0.4s, v2.4s
; GISEL-NEXT: and v0.16b, v1.16b, v3.16b
; GISEL-NEXT: ret
Expand Down

0 comments on commit e123643

Please sign in to comment.