diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 5ed67bd0a121ed..f5dd21cb927012 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -68,6 +68,18 @@ static cl::opt TailDupIndirectBranchSize( "end with indirect branches."), cl::init(20), cl::Hidden); +static cl::opt + TailDupPredSize("tail-dup-pred-size", + cl::desc("Maximum predecessors (maximum successors at the " + "same time) to consider tail duplicating blocks."), + cl::init(16), cl::Hidden); + +static cl::opt + TailDupSuccSize("tail-dup-succ-size", + cl::desc("Maximum successors (maximum predecessors at the " + "same time) to consider tail duplicating blocks."), + cl::init(16), cl::Hidden); + static cl::opt TailDupVerify("tail-dup-verify", cl::desc("Verify sanity of PHI instructions during taildup"), @@ -565,6 +577,14 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (TailBB.isSuccessor(&TailBB)) return false; + // Duplicating a BB which has both multiple predecessors and successors will + // result in a complex CFG and also may cause huge amount of PHI nodes. If we + // want to remove this limitation, we have to address + // https://github.com/llvm/llvm-project/issues/78578. + if (TailBB.pred_size() > TailDupPredSize && + TailBB.succ_size() > TailDupSuccSize) + return false; + // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. diff --git a/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir new file mode 100644 index 00000000000000..67f8cc72e0d726 --- /dev/null +++ b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir @@ -0,0 +1,260 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=3 -tail-dup-succ-size=3 %s -o - | FileCheck %s -check-prefix=LIMIT +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=4 -tail-dup-succ-size=4 %s -o - | FileCheck %s -check-prefix=NOLIMIT + +--- +name: foo +tracksRegLiveness: true +jumpTable: + kind: block-address + entries: + - id: 0 + blocks: [ '%bb.2', '%bb.3', '%bb.4', '%bb.5' ] + - id: 1 + blocks: [ '%bb.9', '%bb.10', '%bb.11', '%bb.12' ] +body: | + ; LIMIT-LABEL: name: foo + ; LIMIT: bb.0: + ; LIMIT-NEXT: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000) + ; LIMIT-NEXT: liveins: $rdi, $esi + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esi + ; LIMIT-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; LIMIT-NEXT: [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.2: + ; LIMIT-NEXT: successors: %bb.7(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: JMP_1 %bb.7 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.3: + ; LIMIT-NEXT: successors: %bb.7(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.7 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.4: + ; LIMIT-NEXT: successors: %bb.7(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.7 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.5: + ; LIMIT-NEXT: successors: %bb.7(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.7 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.6: + ; LIMIT-NEXT: successors: + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.7: + ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[SHR32ri3]], %bb.5, [[SHR32ri2]], %bb.4, [[SHR32ri1]], %bb.3, [[MOV32rm]], %bb.2 + ; LIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri4]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri1]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.9: + ; LIMIT-NEXT: successors: %bb.13(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: JMP_1 %bb.13 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.10: + ; LIMIT-NEXT: successors: %bb.13(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.13 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.11: + ; LIMIT-NEXT: successors: %bb.13(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.13 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.12: + ; LIMIT-NEXT: successors: %bb.13(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.13: + ; LIMIT-NEXT: [[PHI1:%[0-9]+]]:gr32 = PHI [[SHR32ri7]], %bb.12, [[SHR32ri6]], %bb.11, [[SHR32ri5]], %bb.10, [[MOV32rm4]], %bb.9 + ; LIMIT-NEXT: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI1]], [[PHI]], implicit-def dead $eflags + ; LIMIT-NEXT: $eax = COPY [[OR32rr]] + ; LIMIT-NEXT: RET 0, $eax + ; + ; NOLIMIT-LABEL: name: foo + ; NOLIMIT: bb.0: + ; NOLIMIT-NEXT: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000) + ; NOLIMIT-NEXT: liveins: $rdi, $esi + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esi + ; NOLIMIT-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; NOLIMIT-NEXT: [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.2: + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.3: + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.4: + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.5: + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.6: + ; NOLIMIT-NEXT: successors: + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.9: + ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 + ; NOLIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: JMP_1 %bb.13 + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.10: + ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[PHI1:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 + ; NOLIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags + ; NOLIMIT-NEXT: JMP_1 %bb.13 + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.11: + ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[PHI2:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 + ; NOLIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: JMP_1 %bb.13 + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.12: + ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[PHI3:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 + ; NOLIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.13: + ; NOLIMIT-NEXT: [[PHI4:%[0-9]+]]:gr32 = PHI [[PHI]], %bb.9, [[PHI1]], %bb.10, [[PHI2]], %bb.11, [[PHI3]], %bb.12 + ; NOLIMIT-NEXT: [[PHI5:%[0-9]+]]:gr32 = PHI [[SHR32ri10]], %bb.12, [[SHR32ri9]], %bb.11, [[SHR32ri8]], %bb.10, [[MOV32rm4]], %bb.9 + ; NOLIMIT-NEXT: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI5]], [[PHI4]], implicit-def dead $eflags + ; NOLIMIT-NEXT: $eax = COPY [[OR32rr]] + ; NOLIMIT-NEXT: RET 0, $eax + bb.0: + liveins: $rdi, $esi + + %11:gr32 = COPY $esi + %10:gr64 = COPY $rdi + %13:gr32 = SHR32ri %11, 1, implicit-def dead $eflags + %14:gr32 = AND32ri %13, 7, implicit-def dead $eflags + %12:gr64_nosp = SUBREG_TO_REG 0, killed %14, %subreg.sub_32bit + + bb.1: + successors: %bb.2, %bb.3, %bb.4, %bb.5 + + JMP64m $noreg, 8, %12, %jump-table.0, $noreg + + bb.2: + %0:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + JMP_1 %bb.7 + + bb.3: + %17:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %1:gr32 = SHR32ri %17, 1, implicit-def dead $eflags + JMP_1 %bb.7 + + bb.4: + %16:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %2:gr32 = SHR32ri %16, 2, implicit-def dead $eflags + JMP_1 %bb.7 + + bb.5: + %15:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %3:gr32 = SHR32ri %15, 3, implicit-def dead $eflags + JMP_1 %bb.7 + + bb.6: + successors: + + bb.7: + %4:gr32 = PHI %3, %bb.5, %2, %bb.4, %1, %bb.3, %0, %bb.2 + %19:gr32 = SHR32ri %11, 2, implicit-def dead $eflags + %20:gr32 = AND32ri %19, 7, implicit-def dead $eflags + %18:gr64_nosp = SUBREG_TO_REG 0, killed %20, %subreg.sub_32bit + + bb.8: + successors: %bb.9, %bb.10, %bb.11, %bb.12 + + JMP64m $noreg, 8, %18, %jump-table.1, $noreg + + bb.9: + %5:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + JMP_1 %bb.13 + + bb.10: + %23:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %6:gr32 = SHR32ri %23, 1, implicit-def dead $eflags + JMP_1 %bb.13 + + bb.11: + %22:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %7:gr32 = SHR32ri %22, 2, implicit-def dead $eflags + JMP_1 %bb.13 + + bb.12: + %21:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %8:gr32 = SHR32ri %21, 6, implicit-def dead $eflags + + bb.13: + %9:gr32 = PHI %8, %bb.12, %7, %bb.11, %6, %bb.10, %5, %bb.9 + %24:gr32 = OR32rr %9, %4, implicit-def dead $eflags + $eax = COPY %24 + RET 0, $eax + +...