-
Notifications
You must be signed in to change notification settings - Fork 751
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AArch64] - Fold and and cmp into tst (#110347)
Fixes llvm/llvm-project#102703. https://godbolt.org/z/nfj8xsb1Y The following pattern: ``` %2 = and i32 %0, 254 %3 = icmp eq i32 %2, 0 ``` is optimised by instcombine into: ```%3 = icmp ult i32 %0, 2``` However, post instcombine leads to worse aarch64 than the unoptimised version. Pre instcombine: ``` tst w0, #0xfe cset w0, eq ret ``` Post instcombine: ``` and w8, w0, #0xff cmp w8, #2 cset w0, lo ret ``` In the unoptimised version, SelectionDAG converts `SETCC (AND X 254) 0 EQ` into `CSEL 0 1 1 (ANDS X 254)`, which gets emitted as a `tst`. In the optimised version, SelectionDAG converts `SETCC (AND X 255) 2 ULT` into `CSEL 0 1 2 (SUBS (AND X 255) 2)`, which gets emitted as an `and`/`cmp`. This PR adds an optimisation to `AArch64ISelLowering`, converting `SETCC (AND X Y) Z ULT` into `SETCC (AND X (Y & ~(Z - 1))) 0 EQ` when `Z` is a power of two. This makes SelectionDAG/Codegen produce the same optimised code for both examples.
- Loading branch information
Showing
3 changed files
with
252 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,216 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s | ||
|
||
|
||
define i1 @lt8_u8(i8 %0) { | ||
; CHECK-LABEL: lt8_u8: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: tst w0, #0xf8 | ||
; CHECK-NEXT: cset w0, eq | ||
; CHECK-NEXT: ret | ||
%2 = icmp ult i8 %0, 8 | ||
ret i1 %2 | ||
} | ||
|
||
define i1 @lt32_u8(i8 %0) { | ||
; CHECK-LABEL: lt32_u8: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: tst w0, #0xe0 | ||
; CHECK-NEXT: cset w0, eq | ||
; CHECK-NEXT: ret | ||
%2 = icmp ult i8 %0, 32 | ||
ret i1 %2 | ||
} | ||
|
||
define i1 @lt64_u8(i8 %0) { | ||
; CHECK-LABEL: lt64_u8: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: tst w0, #0xc0 | ||
; CHECK-NEXT: cset w0, eq | ||
; CHECK-NEXT: ret | ||
%2 = icmp ult i8 %0, 64 | ||
ret i1 %2 | ||
} | ||
|
||
define i1 @lt8_u32(i32 %0) { | ||
; CHECK-LABEL: lt8_u32: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: cmp w0, #8 | ||
; CHECK-NEXT: cset w0, lo | ||
; CHECK-NEXT: ret | ||
%2 = icmp ult i32 %0, 8 | ||
ret i1 %2 | ||
} | ||
|
||
define i1 @lt32_u32(i32 %0) { | ||
; CHECK-LABEL: lt32_u32: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: cmp w0, #32 | ||
; CHECK-NEXT: cset w0, lo | ||
; CHECK-NEXT: ret | ||
%2 = icmp ult i32 %0, 32 | ||
ret i1 %2 | ||
} | ||
|
||
define i1 @lt64_u32(i32 %0) { | ||
; CHECK-LABEL: lt64_u32: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: cmp w0, #64 | ||
; CHECK-NEXT: cset w0, lo | ||
; CHECK-NEXT: ret | ||
%2 = icmp ult i32 %0, 64 | ||
ret i1 %2 | ||
} | ||
|
||
define i1 @lt8_u64(i64 %0) { | ||
; CHECK-LABEL: lt8_u64: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: cmp x0, #8 | ||
; CHECK-NEXT: cset w0, lo | ||
; CHECK-NEXT: ret | ||
%2 = icmp ult i64 %0, 8 | ||
ret i1 %2 | ||
} | ||
|
||
define i1 @lt32_u64(i64 %0) { | ||
; CHECK-LABEL: lt32_u64: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: cmp x0, #32 | ||
; CHECK-NEXT: cset w0, lo | ||
; CHECK-NEXT: ret | ||
%2 = icmp ult i64 %0, 32 | ||
ret i1 %2 | ||
} | ||
|
||
define i1 @lt64_u64(i64 %0) { | ||
; CHECK-LABEL: lt64_u64: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: cmp x0, #64 | ||
; CHECK-NEXT: cset w0, lo | ||
; CHECK-NEXT: ret | ||
%2 = icmp ult i64 %0, 64 | ||
ret i1 %2 | ||
} | ||
|
||
define i1 @lt8_u16_and_5(i8 %0) { | ||
; CHECK-LABEL: lt8_u16_and_5: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: mov w8, wzr | ||
; CHECK-NEXT: cmp w8, #0 | ||
; CHECK-NEXT: cset w0, eq | ||
; CHECK-NEXT: ret | ||
%2 = and i8 %0, 5 | ||
%3 = icmp ult i8 %2, 16 | ||
ret i1 %3 | ||
} | ||
|
||
define i1 @lt8_u16_and_19(i8 %0) { | ||
; CHECK-LABEL: lt8_u16_and_19: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: tst w0, #0x10 | ||
; CHECK-NEXT: cset w0, eq | ||
; CHECK-NEXT: ret | ||
%2 = and i8 %0, 19 | ||
%3 = icmp ult i8 %2, 16 | ||
ret i1 %3 | ||
} | ||
|
||
define i1 @lt32_u16_and_7(i32 %0) { | ||
; CHECK-LABEL: lt32_u16_and_7: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: mov w8, wzr | ||
; CHECK-NEXT: cmp w8, #0 | ||
; CHECK-NEXT: cset w0, eq | ||
; CHECK-NEXT: ret | ||
%2 = and i32 %0, 7 | ||
%3 = icmp ult i32 %2, 16 | ||
ret i1 %3 | ||
} | ||
|
||
define i1 @lt32_u16_and_21(i32 %0) { | ||
; CHECK-LABEL: lt32_u16_and_21: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: tst w0, #0x10 | ||
; CHECK-NEXT: cset w0, eq | ||
; CHECK-NEXT: ret | ||
%2 = and i32 %0, 21 | ||
%3 = icmp ult i32 %2, 16 | ||
ret i1 %3 | ||
} | ||
|
||
define i1 @lt64_u16_and_9(i64 %0) { | ||
; CHECK-LABEL: lt64_u16_and_9: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: mov x8, xzr | ||
; CHECK-NEXT: cmp x8, #0 | ||
; CHECK-NEXT: cset w0, eq | ||
; CHECK-NEXT: ret | ||
%2 = and i64 %0, 9 | ||
%3 = icmp ult i64 %2, 16 | ||
ret i1 %3 | ||
} | ||
|
||
define i1 @lt64_u16_and_23(i64 %0) { | ||
; CHECK-LABEL: lt64_u16_and_23: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: tst x0, #0x10 | ||
; CHECK-NEXT: cset w0, eq | ||
; CHECK-NEXT: ret | ||
%2 = and i64 %0, 23 | ||
%3 = icmp ult i64 %2, 16 | ||
ret i1 %3 | ||
} | ||
|
||
; negative test | ||
define i1 @lt3_u8(i8 %0) { | ||
; CHECK-LABEL: lt3_u8: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: and w8, w0, #0xff | ||
; CHECK-NEXT: cmp w8, #3 | ||
; CHECK-NEXT: cset w0, lo | ||
; CHECK-NEXT: ret | ||
%2 = icmp ult i8 %0, 3 | ||
ret i1 %2 | ||
} | ||
|
||
; negative test | ||
define i1 @lt3_u32(i32 %0) { | ||
; CHECK-LABEL: lt3_u32: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: cmp w0, #3 | ||
; CHECK-NEXT: cset w0, lo | ||
; CHECK-NEXT: ret | ||
%2 = icmp ult i32 %0, 3 | ||
ret i1 %2 | ||
} | ||
|
||
; negative test | ||
define i1 @lt3_u64(i64 %0) { | ||
; CHECK-LABEL: lt3_u64: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: cmp x0, #3 | ||
; CHECK-NEXT: cset w0, lo | ||
; CHECK-NEXT: ret | ||
%2 = icmp ult i64 %0, 3 | ||
ret i1 %2 | ||
} | ||
|
||
; negative test | ||
define i32 @lt32_u16_multiple_use(i32 %0) { | ||
; CHECK-LABEL: lt32_u16_multiple_use: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: mov w8, #21 // =0x15 | ||
; CHECK-NEXT: mov w9, #10 // =0xa | ||
; CHECK-NEXT: and w8, w0, w8 | ||
; CHECK-NEXT: cmp w8, #16 | ||
; CHECK-NEXT: orr w8, w8, w9 | ||
; CHECK-NEXT: cset w10, lo | ||
; CHECK-NEXT: mul w0, w8, w10 | ||
; CHECK-NEXT: ret | ||
%2 = and i32 %0, 21 | ||
%3 = icmp ult i32 %2, 16 | ||
%4 = add i32 %2, 10 | ||
%5 = zext i1 %3 to i32 | ||
%6 = mul i32 %4, %5 | ||
ret i32 %6 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters