Skip to content

Commit

Permalink
[JumpThreading] Thread over BB with only an unconditional branch (#86312
Browse files Browse the repository at this point in the history
)

Fixes #76609
This patch does:
- relax the phis constraint in `CanRedirectPredsOfEmptyBBToSucc`
- guarantee the BB has multiple different predecessors to redirect, so
that we can handle the case without phis in BB. Without this change and
phi constraint, we may redirect the CommonPred.

The motivation is consistent with JumpThreading. We always want the
branch to jump more direct to the destination, without passing the
middle block. In this way, we can expose more other optimization
opportunities.

An obivous example proposed by @dtcxzyw is like:
```llvm
define i32 @test(...) {
entry:
   br i1 %c, label %do.end, label %if.then

if.then:                                          ; preds = %entry
   %call2 = call i32 @dummy()
   %tobool3.not = icmp eq i32 %call2, 0
   br i1 %tobool3.not, label %do.end, label %return

do.end:                                           ; preds = %entry, %if.then
   br label %return

return:                                           ; preds = %if.then, %do.end
   %retval.0 = phi i32 [ 0, %do.end ], [ %call2, %if.then ]
   ret i32 %retval.0
}
```
`entry` can directly jump to return, without passing `do.end`, and then
the if-else pattern can be simplified further:
```llvm
define i32 @test(...) {
entry:
   br i1 %c, label %return, label %if.then

if.then:                                          ; preds = %entry
   %call2 = call i32 @dummy()
   br label %return

return:                                           ; preds = %if.then
   %retval.0 = phi i32 [ 0, %entry ], [ %call2, %if.then ]
   ret i32 %retval.0
}
```
  • Loading branch information
XChy authored Apr 16, 2024
1 parent 1120d8e commit 36b3c26
Show file tree
Hide file tree
Showing 9 changed files with 284 additions and 126 deletions.
10 changes: 6 additions & 4 deletions llvm/lib/Transforms/Utils/Local.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1019,12 +1019,14 @@ CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ,
const SmallPtrSetImpl<BasicBlock *> &SuccPreds,
BasicBlock *&CommonPred) {

// There must be phis in BB, otherwise BB will be merged into Succ directly
if (BB->phis().empty() || Succ->phis().empty())
// When Succ has no phis, BB may be merged into Succ directly. We don't need
// to redirect the predecessors of BB in this case.
if (Succ->phis().empty())
return false;

// BB must have predecessors not shared that can be redirected to Succ
if (!BB->hasNPredecessorsOrMore(2))
// BB must have multiple different predecessors, so that at least one of
// predecessors can be redirected to Succ, except the common predecessor.
if (BB->getUniquePredecessor() || pred_empty(BB))
return false;

// Get single common predecessors of both BB and Succ
Expand Down
9 changes: 4 additions & 5 deletions llvm/test/CodeGen/AArch64/and-sink.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,14 @@
define dso_local i32 @and_sink1(i32 %a, i1 %c) {
; CHECK-LABEL: and_sink1:
; CHECK: // %bb.0:
; CHECK-NEXT: tbz w1, #0, .LBB0_3
; CHECK-NEXT: tbz w1, #0, .LBB0_2
; CHECK-NEXT: // %bb.1: // %bb0
; CHECK-NEXT: tst w0, #0x4
; CHECK-NEXT: adrp x8, A
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: str wzr, [x8, :lo12:A]
; CHECK-NEXT: tbnz w0, #2, .LBB0_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_3: // %bb2
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret

Expand Down
122 changes: 50 additions & 72 deletions llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ define i32 @combine_gt_ge_10() #0 {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:a
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmp w8, #10
; CHECK-NEXT: ldr w9, [x8]
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: cmp w9, #10
; CHECK-NEXT: b.le .LBB0_3
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x9, :got:c
Expand All @@ -29,18 +29,17 @@ define i32 @combine_gt_ge_10() #0 {
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_3: // %lor.lhs.false
; CHECK-NEXT: b.lt .LBB0_6
; CHECK-NEXT: cmp w9, #10
; CHECK-NEXT: b.lt .LBB0_5
; CHECK-NEXT: .LBB0_4: // %land.lhs.true3
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB0_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_6: // %if.end
; CHECK-NEXT: .LBB0_5:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -145,10 +144,10 @@ define i32 @combine_lt_ge_5() #0 {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:a
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmp w8, #5
; CHECK-NEXT: ldr w9, [x8]
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: cmp w9, #5
; CHECK-NEXT: b.ge .LBB2_3
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x9, :got:c
Expand All @@ -161,18 +160,17 @@ define i32 @combine_lt_ge_5() #0 {
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB2_3: // %lor.lhs.false
; CHECK-NEXT: b.gt .LBB2_6
; CHECK-NEXT: cmp w9, #5
; CHECK-NEXT: b.gt .LBB2_5
; CHECK-NEXT: .LBB2_4: // %land.lhs.true3
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB2_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB2_6: // %if.end
; CHECK-NEXT: .LBB2_5:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -499,24 +497,17 @@ define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 {
; CHECK-NEXT: // %bb.3: // %while.cond.while.end_crit_edge
; CHECK-NEXT: ldr w8, [x19]
; CHECK-NEXT: .LBB7_4: // %while.end
; CHECK-NEXT: cmp w8, #1
; CHECK-NEXT: b.gt .LBB7_7
; CHECK-NEXT: // %bb.5: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB7_7
; CHECK-NEXT: // %bb.6:
; CHECK-NEXT: mov w0, #123 // =0x7b
; CHECK-NEXT: b .LBB7_8
; CHECK-NEXT: .LBB7_7: // %if.end
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: .LBB7_8: // %return
; CHECK-NEXT: adrp x9, :got:b
; CHECK-NEXT: adrp x10, :got:d
; CHECK-NEXT: ldr x9, [x9, :got_lo12:b]
; CHECK-NEXT: ldr x10, [x10, :got_lo12:d]
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: ldr w10, [x10]
; CHECK-NEXT: cmp w9, w10
; CHECK-NEXT: ccmp w8, #2, #0, eq
; CHECK-NEXT: mov w8, #123 // =0x7b
; CHECK-NEXT: csel w0, w8, wzr, lt
; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w19
Expand Down Expand Up @@ -564,52 +555,42 @@ return: ; preds = %if.end, %land.lhs.t
define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 {
; CHECK-LABEL: do_nothing_if_compares_can_not_be_adjusted_to_each_other:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: .cfi_remember_state
; CHECK-NEXT: adrp x8, :got:a
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: b.gt .LBB8_3
; CHECK-NEXT: b.gt .LBB8_4
; CHECK-NEXT: // %bb.1: // %while.body.preheader
; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: sub w19, w8, #1
; CHECK-NEXT: .LBB8_2: // %while.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: bl do_something
; CHECK-NEXT: adds w19, w19, #1
; CHECK-NEXT: b.mi .LBB8_2
; CHECK-NEXT: .LBB8_3: // %while.end
; CHECK-NEXT: adrp x8, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:c]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmn w8, #2
; CHECK-NEXT: b.lt .LBB8_6
; CHECK-NEXT: // %bb.4: // %land.lhs.true
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w19
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: .LBB8_4: // %while.end
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: adrp x10, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
; CHECK-NEXT: ldr x10, [x10, :got_lo12:c]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: ldr w10, [x10]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB8_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: mov w0, #123 // =0x7b
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w19
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB8_6: // %if.end
; CHECK-NEXT: .cfi_restore_state
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w19
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: mov w8, #-3 // =0xfffffffd
; CHECK-NEXT: ccmp w10, w8, #4, eq
; CHECK-NEXT: mov w8, #123 // =0x7b
; CHECK-NEXT: csel w0, w8, wzr, gt
; CHECK-NEXT: ret
entry:
%0 = load i32, ptr @a, align 4
Expand Down Expand Up @@ -782,37 +763,34 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: csel x9, x0, xzr, gt
; CHECK-NEXT: str x9, [x1]
; CHECK-NEXT: b.le .LBB11_2
; CHECK-NEXT: b.le .LBB11_3
; CHECK-NEXT: // %bb.1: // %lor.lhs.false
; CHECK-NEXT: cmp w8, #2
; CHECK-NEXT: b.ge .LBB11_4
; CHECK-NEXT: b .LBB11_6
; CHECK-NEXT: .LBB11_2: // %land.lhs.true
; CHECK-NEXT: b.ge .LBB11_5
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB11_3: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB11_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: b.ne .LBB11_5
; CHECK-NEXT: // %bb.4:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB11_4: // %land.lhs.true3
; CHECK-NEXT: .LBB11_5: // %land.lhs.true3
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB11_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB11_6: // %if.end
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
entry:
%0 = load i32, ptr @a, align 4
Expand Down
18 changes: 5 additions & 13 deletions llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@ define i32 @fred(ptr %a0) #0 {
; CHECK-LABEL: fred:
; CHECK: // %bb.0: // %b0
; CHECK-NEXT: {
; CHECK-NEXT: if (p0) jump:nt .LBB0_2
; CHECK-NEXT: r1:0 = combine(r0,#0)
; CHECK-NEXT: if (p0) jumpr r31
; CHECK-NEXT: }
; CHECK-NEXT: // %bb.1: // %b2
; CHECK-NEXT: .LBB0_1: // %b2
; CHECK-NEXT: {
; CHECK-NEXT: r3:2 = combine(#0,#0)
; CHECK-NEXT: r1:0 = memd(r0+#0)
; CHECK-NEXT: r1:0 = memd(r1+#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = vcmph.eq(r1:0,r3:2)
Expand All @@ -27,16 +28,7 @@ define i32 @fred(ptr %a0) #0 {
; CHECK-NEXT: r0 = and(r0,#1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = cmp.eq(r0,#11)
; CHECK-NEXT: r0 = #1
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: if (p0) r0 = #0
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
; CHECK-NEXT: .LBB0_2: // %b14
; CHECK-NEXT: {
; CHECK-NEXT: r0 = #0
; CHECK-NEXT: r0 = !cmp.eq(r0,#11)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
b0:
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/JumpThreading/pr79175.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ define i32 @test(i64 %idx, i32 %val) {
; CHECK: cond.end:
; CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt i32 [[VAL]], 0
; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[CMP_I]]
; CHECK-NEXT: br i1 [[COND_FR]], label [[COND_END_THREAD]], label [[TMP0:%.*]]
; CHECK: cond.end.thread:
; CHECK-NEXT: br label [[TMP0]]
; CHECK-NEXT: br i1 [[COND_FR]], label [[TMP0:%.*]], label [[COND_END_THREAD]]
; CHECK: 0:
; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ 0, [[COND_END_THREAD]] ], [ [[VAL]], [[COND_END]] ]
; CHECK-NEXT: br label [[COND_END_THREAD]]
; CHECK: cond.end.thread:
; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[VAL]], [[COND_END]] ], [ 0, [[TMP0]] ], [ 0, [[FOR_BODY]] ]
; CHECK-NEXT: [[F_IDX:%.*]] = getelementptr inbounds i32, ptr @f, i64 [[IDX]]
; CHECK-NEXT: store i32 [[TMP1]], ptr [[F_IDX]], align 4
; CHECK-NEXT: [[F_RELOAD:%.*]] = load i32, ptr @f, align 4
Expand Down
Loading

0 comments on commit 36b3c26

Please sign in to comment.