Skip to content

Commit

Permalink
[TailDuplicator] Add maximum predecessors and successors to consider …
Browse files Browse the repository at this point in the history
…tail duplicating blocks (#78582)

Fixes #78578.

Duplicating a BB which has both multiple predecessors and successors
will result in a complex CFG and also may cause huge amount of PHI
nodes. See
#78578 (comment)
for a detailed description of the limit.
  • Loading branch information
DianQK authored Apr 17, 2024
1 parent 4536ad4 commit 86a7828
Show file tree
Hide file tree
Showing 2 changed files with 280 additions and 0 deletions.
20 changes: 20 additions & 0 deletions llvm/lib/CodeGen/TailDuplicator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,18 @@ static cl::opt<unsigned> TailDupIndirectBranchSize(
"end with indirect branches."), cl::init(20),
cl::Hidden);

static cl::opt<unsigned>
TailDupPredSize("tail-dup-pred-size",
cl::desc("Maximum predecessors (maximum successors at the "
"same time) to consider tail duplicating blocks."),
cl::init(16), cl::Hidden);

static cl::opt<unsigned>
TailDupSuccSize("tail-dup-succ-size",
cl::desc("Maximum successors (maximum predecessors at the "
"same time) to consider tail duplicating blocks."),
cl::init(16), cl::Hidden);

static cl::opt<bool>
TailDupVerify("tail-dup-verify",
cl::desc("Verify sanity of PHI instructions during taildup"),
Expand Down Expand Up @@ -565,6 +577,14 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
if (TailBB.isSuccessor(&TailBB))
return false;

// Duplicating a BB which has both multiple predecessors and successors will
// result in a complex CFG and also may cause huge amount of PHI nodes. If we
// want to remove this limitation, we have to address
// https://github.com/llvm/llvm-project/issues/78578.
if (TailBB.pred_size() > TailDupPredSize &&
TailBB.succ_size() > TailDupSuccSize)
return false;

// Set the limit on the cost to duplicate. When optimizing for size,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
Expand Down
260 changes: 260 additions & 0 deletions llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=3 -tail-dup-succ-size=3 %s -o - | FileCheck %s -check-prefix=LIMIT
# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=4 -tail-dup-succ-size=4 %s -o - | FileCheck %s -check-prefix=NOLIMIT

---
name: foo
tracksRegLiveness: true
jumpTable:
kind: block-address
entries:
- id: 0
blocks: [ '%bb.2', '%bb.3', '%bb.4', '%bb.5' ]
- id: 1
blocks: [ '%bb.9', '%bb.10', '%bb.11', '%bb.12' ]
body: |
; LIMIT-LABEL: name: foo
; LIMIT: bb.0:
; LIMIT-NEXT: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000)
; LIMIT-NEXT: liveins: $rdi, $esi
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esi
; LIMIT-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
; LIMIT-NEXT: [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags
; LIMIT-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags
; LIMIT-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.2:
; LIMIT-NEXT: successors: %bb.7(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: JMP_1 %bb.7
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.3:
; LIMIT-NEXT: successors: %bb.7(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags
; LIMIT-NEXT: JMP_1 %bb.7
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.4:
; LIMIT-NEXT: successors: %bb.7(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags
; LIMIT-NEXT: JMP_1 %bb.7
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.5:
; LIMIT-NEXT: successors: %bb.7(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags
; LIMIT-NEXT: JMP_1 %bb.7
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.6:
; LIMIT-NEXT: successors:
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.7:
; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[SHR32ri3]], %bb.5, [[SHR32ri2]], %bb.4, [[SHR32ri1]], %bb.3, [[MOV32rm]], %bb.2
; LIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; LIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri4]], 7, implicit-def dead $eflags
; LIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri1]], %subreg.sub_32bit
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.9:
; LIMIT-NEXT: successors: %bb.13(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: JMP_1 %bb.13
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.10:
; LIMIT-NEXT: successors: %bb.13(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags
; LIMIT-NEXT: JMP_1 %bb.13
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.11:
; LIMIT-NEXT: successors: %bb.13(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags
; LIMIT-NEXT: JMP_1 %bb.13
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.12:
; LIMIT-NEXT: successors: %bb.13(0x80000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.13:
; LIMIT-NEXT: [[PHI1:%[0-9]+]]:gr32 = PHI [[SHR32ri7]], %bb.12, [[SHR32ri6]], %bb.11, [[SHR32ri5]], %bb.10, [[MOV32rm4]], %bb.9
; LIMIT-NEXT: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI1]], [[PHI]], implicit-def dead $eflags
; LIMIT-NEXT: $eax = COPY [[OR32rr]]
; LIMIT-NEXT: RET 0, $eax
;
; NOLIMIT-LABEL: name: foo
; NOLIMIT: bb.0:
; NOLIMIT-NEXT: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000)
; NOLIMIT-NEXT: liveins: $rdi, $esi
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esi
; NOLIMIT-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
; NOLIMIT-NEXT: [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags
; NOLIMIT-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags
; NOLIMIT-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit
; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.2:
; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; NOLIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; NOLIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags
; NOLIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit
; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.3:
; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; NOLIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags
; NOLIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; NOLIMIT-NEXT: [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags
; NOLIMIT-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit
; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.4:
; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; NOLIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags
; NOLIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; NOLIMIT-NEXT: [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags
; NOLIMIT-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit
; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.5:
; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; NOLIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags
; NOLIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; NOLIMIT-NEXT: [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags
; NOLIMIT-NEXT: [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit
; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.6:
; NOLIMIT-NEXT: successors:
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.9:
; NOLIMIT-NEXT: successors: %bb.13(0x80000000)
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
; NOLIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; NOLIMIT-NEXT: JMP_1 %bb.13
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.10:
; NOLIMIT-NEXT: successors: %bb.13(0x80000000)
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: [[PHI1:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
; NOLIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; NOLIMIT-NEXT: [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags
; NOLIMIT-NEXT: JMP_1 %bb.13
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.11:
; NOLIMIT-NEXT: successors: %bb.13(0x80000000)
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: [[PHI2:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
; NOLIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; NOLIMIT-NEXT: [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags
; NOLIMIT-NEXT: JMP_1 %bb.13
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.12:
; NOLIMIT-NEXT: successors: %bb.13(0x80000000)
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: [[PHI3:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
; NOLIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; NOLIMIT-NEXT: [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags
; NOLIMIT-NEXT: {{ $}}
; NOLIMIT-NEXT: bb.13:
; NOLIMIT-NEXT: [[PHI4:%[0-9]+]]:gr32 = PHI [[PHI]], %bb.9, [[PHI1]], %bb.10, [[PHI2]], %bb.11, [[PHI3]], %bb.12
; NOLIMIT-NEXT: [[PHI5:%[0-9]+]]:gr32 = PHI [[SHR32ri10]], %bb.12, [[SHR32ri9]], %bb.11, [[SHR32ri8]], %bb.10, [[MOV32rm4]], %bb.9
; NOLIMIT-NEXT: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI5]], [[PHI4]], implicit-def dead $eflags
; NOLIMIT-NEXT: $eax = COPY [[OR32rr]]
; NOLIMIT-NEXT: RET 0, $eax
bb.0:
liveins: $rdi, $esi
%11:gr32 = COPY $esi
%10:gr64 = COPY $rdi
%13:gr32 = SHR32ri %11, 1, implicit-def dead $eflags
%14:gr32 = AND32ri %13, 7, implicit-def dead $eflags
%12:gr64_nosp = SUBREG_TO_REG 0, killed %14, %subreg.sub_32bit
bb.1:
successors: %bb.2, %bb.3, %bb.4, %bb.5
JMP64m $noreg, 8, %12, %jump-table.0, $noreg
bb.2:
%0:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
JMP_1 %bb.7
bb.3:
%17:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
%1:gr32 = SHR32ri %17, 1, implicit-def dead $eflags
JMP_1 %bb.7
bb.4:
%16:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
%2:gr32 = SHR32ri %16, 2, implicit-def dead $eflags
JMP_1 %bb.7
bb.5:
%15:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
%3:gr32 = SHR32ri %15, 3, implicit-def dead $eflags
JMP_1 %bb.7
bb.6:
successors:
bb.7:
%4:gr32 = PHI %3, %bb.5, %2, %bb.4, %1, %bb.3, %0, %bb.2
%19:gr32 = SHR32ri %11, 2, implicit-def dead $eflags
%20:gr32 = AND32ri %19, 7, implicit-def dead $eflags
%18:gr64_nosp = SUBREG_TO_REG 0, killed %20, %subreg.sub_32bit
bb.8:
successors: %bb.9, %bb.10, %bb.11, %bb.12
JMP64m $noreg, 8, %18, %jump-table.1, $noreg
bb.9:
%5:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
JMP_1 %bb.13
bb.10:
%23:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
%6:gr32 = SHR32ri %23, 1, implicit-def dead $eflags
JMP_1 %bb.13
bb.11:
%22:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
%7:gr32 = SHR32ri %22, 2, implicit-def dead $eflags
JMP_1 %bb.13
bb.12:
%21:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
%8:gr32 = SHR32ri %21, 6, implicit-def dead $eflags
bb.13:
%9:gr32 = PHI %8, %bb.12, %7, %bb.11, %6, %bb.10, %5, %bb.9
%24:gr32 = OR32rr %9, %4, implicit-def dead $eflags
$eax = COPY %24
RET 0, $eax
...

0 comments on commit 86a7828

Please sign in to comment.