diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 0b875ed6881014..0b3682353f736e 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -908,6 +908,10 @@ class BinaryFunction { return BB && BB->getOffset() == Offset ? BB : nullptr; } + const BinaryBasicBlock *getBasicBlockAtOffset(uint64_t Offset) const { + return const_cast(this)->getBasicBlockAtOffset(Offset); + } + /// Retrieve the landing pad BB associated with invoke instruction \p Invoke /// that is in \p BB. Return nullptr if none exists BinaryBasicBlock *getLandingPadBBFor(const BinaryBasicBlock &BB, diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 6453b3070ceb8d..2880bfd03be789 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -266,7 +266,8 @@ class DataAggregator : public DataReader { uint64_t Mispreds); /// Register a \p Branch. - bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds); + bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds, + bool IsPreagg); /// Register a trace between two LBR entries supplied in execution order. bool doTrace(const LBREntry &First, const LBREntry &Second, diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index ffd693f9bbaed4..697cac9fbcaa08 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -778,42 +778,75 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc, } bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count, - uint64_t Mispreds) { - bool IsReturn = false; - auto handleAddress = [&](uint64_t &Addr, bool IsFrom) -> BinaryFunction * { - if (BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr)) { - Addr -= Func->getAddress(); - if (IsFrom) { - auto checkReturn = [&](auto MaybeInst) { - IsReturn = MaybeInst && BC->MIB->isReturn(*MaybeInst); - }; - if (Func->hasInstructions()) - checkReturn(Func->getInstructionAtOffset(Addr)); - else - checkReturn(Func->disassembleInstructionAtOffset(Addr)); - } + uint64_t Mispreds, bool IsPreagg) { + // Returns whether \p Offset in \p Func contains a return instruction. + auto checkReturn = [&](const BinaryFunction &Func, const uint64_t Offset) { + auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); }; + return Func.hasInstructions() + ? isReturn(Func.getInstructionAtOffset(Offset)) + : isReturn(Func.disassembleInstructionAtOffset(Offset)); + }; - if (BAT) - Addr = BAT->translate(Func->getAddress(), Addr, IsFrom); + // Returns whether \p Offset in \p Func may be a call continuation excluding + // entry points and landing pads. + auto checkCallCont = [&](const BinaryFunction &Func, const uint64_t Offset) { + // No call continuation at a function start. + if (!Offset) + return false; + + // FIXME: support BAT case where the function might be in empty state + // (split fragments declared non-simple). + if (!Func.hasCFG()) + return false; + + // The offset should not be an entry point or a landing pad. + const BinaryBasicBlock *ContBB = Func.getBasicBlockAtOffset(Offset); + return ContBB && !ContBB->isEntryPoint() && !ContBB->isLandingPad(); + }; - if (BinaryFunction *ParentFunc = getBATParentFunction(*Func)) { - Func = ParentFunc; - if (IsFrom) - NumColdSamples += Count; - } + // Mutates \p Addr to an offset into the containing function, performing BAT + // offset translation and parent lookup. + // + // Returns the containing function (or BAT parent) and whether the address + // corresponds to a return (if \p IsFrom) or a call continuation (otherwise). + auto handleAddress = [&](uint64_t &Addr, bool IsFrom) { + BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr); + if (!Func) + return std::pair{Func, false}; - return Func; - } - return nullptr; + Addr -= Func->getAddress(); + + bool IsRetOrCallCont = + IsFrom ? checkReturn(*Func, Addr) : checkCallCont(*Func, Addr); + + if (BAT) + Addr = BAT->translate(Func->getAddress(), Addr, IsFrom); + + BinaryFunction *ParentFunc = getBATParentFunction(*Func); + if (!ParentFunc) + return std::pair{Func, IsRetOrCallCont}; + + if (IsFrom) + NumColdSamples += Count; + + return std::pair{ParentFunc, IsRetOrCallCont}; }; - BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true); + uint64_t ToOrig = To; + auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom*/ true); + auto [ToFunc, IsCallCont] = handleAddress(To, /*IsFrom*/ false); + if (!FromFunc && !ToFunc) + return false; + + // Record call to continuation trace. + if (IsPreagg && FromFunc != ToFunc && (IsReturn || IsCallCont)) { + LBREntry First{ToOrig - 1, ToOrig - 1, false}; + LBREntry Second{ToOrig, ToOrig, false}; + return doTrace(First, Second, Count); + } // Ignore returns. if (IsReturn) return true; - BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false); - if (!FromFunc && !ToFunc) - return false; // Treat recursive control transfers as inter-branches. if (FromFunc == ToFunc && To != 0) { @@ -830,10 +863,19 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second, BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From); if (!FromFunc || !ToFunc) { LLVM_DEBUG({ - dbgs() << "Out of range trace starting in " << FromFunc->getPrintName() - << formatv(" @ {0:x}", First.To - FromFunc->getAddress()) - << " and ending in " << ToFunc->getPrintName() - << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress()); + dbgs() << "Out of range trace starting in "; + if (FromFunc) + dbgs() << formatv("{0} @ {1:x}", *FromFunc, + First.To - FromFunc->getAddress()); + else + dbgs() << Twine::utohexstr(First.To); + dbgs() << " and ending in "; + if (ToFunc) + dbgs() << formatv("{0} @ {1:x}", *ToFunc, + Second.From - ToFunc->getAddress()); + else + dbgs() << Twine::utohexstr(Second.From); + dbgs() << '\n'; }); NumLongRangeTraces += Count; return false; @@ -1620,7 +1662,8 @@ void DataAggregator::processBranchEvents() { for (const auto &AggrLBR : BranchLBRs) { const Trace &Loc = AggrLBR.first; const TakenBranchInfo &Info = AggrLBR.second; - doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount); + doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount, + /*IsPreagg*/ false); } } @@ -1781,7 +1824,7 @@ void DataAggregator::processPreAggregated() { switch (AggrEntry.EntryType) { case AggregatedLBREntry::BRANCH: doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count, - AggrEntry.Mispreds); + AggrEntry.Mispreds, /*IsPreagg*/ true); break; case AggregatedLBREntry::FT: case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: { diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s new file mode 100644 index 00000000000000..31a7910d7fa3f4 --- /dev/null +++ b/bolt/test/X86/callcont-fallthru.s @@ -0,0 +1,132 @@ +## Ensures that a call continuation fallthrough count is set when using +## pre-aggregated perf data. + +# RUN: %clangxx %cxxflags %s -o %t -Wl,-q -nostdlib +# RUN: link_fdata %s %t %t.pa1 PREAGG +# RUN: link_fdata %s %t %t.pa2 PREAGG2 +# RUN: link_fdata %s %t %t.pa3 PREAGG3 +# RUN: link_fdata %s %t %t.pa4 PREAGG4 + +## Check normal case: fallthrough is not LP or secondary entry. +# RUN: llvm-strip --strip-unneeded %t -o %t.exe +# RUN: llvm-bolt %t.exe --pa -p %t.pa1 -o %t.out \ +# RUN: --print-cfg --print-only=main | FileCheck %s + +## Check that getFallthroughsInTrace correctly handles a trace starting at plt +## call continuation +# RUN: llvm-bolt %t.exe --pa -p %t.pa2 -o %t.out2 \ +# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK2 + +## Check that we don't treat secondary entry points as call continuation sites. +# RUN: llvm-bolt %t --pa -p %t.pa3 -o %t.out \ +# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK3 + +## Check fallthrough to a landing pad case. +# RUN: llvm-bolt %t.exe --pa -p %t.pa4 -o %t.out \ +# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK4 + + .globl foo + .type foo, %function +foo: + pushq %rbp + movq %rsp, %rbp + popq %rbp +Lfoo_ret: + retq +.size foo, .-foo + + .globl main + .type main, %function +main: +.Lfunc_begin0: + .cfi_startproc + .cfi_personality 155, DW.ref.__gxx_personality_v0 + .cfi_lsda 27, .Lexception0 + pushq %rbp + movq %rsp, %rbp + subq $0x20, %rsp + movl $0x0, -0x4(%rbp) + movl %edi, -0x8(%rbp) + movq %rsi, -0x10(%rbp) + callq puts@PLT +## Target is a call continuation +# PREAGG: B X:0 #Ltmp1# 2 0 +# CHECK: callq puts@PLT +# CHECK-NEXT: count: 2 + +Ltmp1: + movq -0x10(%rbp), %rax + movq 0x8(%rax), %rdi + movl %eax, -0x14(%rbp) + +Ltmp4: + cmpl $0x0, -0x14(%rbp) + je Ltmp0 +# CHECK2: je .Ltmp0 +# CHECK2-NEXT: count: 3 + + movl $0xa, -0x18(%rbp) + callq foo +## Target is a call continuation +# PREAGG: B #Lfoo_ret# #Ltmp3# 1 0 +# CHECK: callq foo +# CHECK-NEXT: count: 1 + +## PLT call continuation fallthrough spanning the call +# PREAGG2: F #Ltmp1# #Ltmp3_br# 3 +# CHECK2: callq foo +# CHECK2-NEXT: count: 3 + +## Target is a secondary entry point +# PREAGG3: B X:0 #Ltmp3# 2 0 +# CHECK3: callq foo +# CHECK3-NEXT: count: 0 + +## Target is a landing pad +# PREAGG4: B X:0 #Ltmp3# 2 0 +# CHECK4: callq puts@PLT +# CHECK4-NEXT: count: 0 + +Ltmp3: + cmpl $0x0, -0x18(%rbp) +Ltmp3_br: + jmp Ltmp2 + +Ltmp2: + movl -0x18(%rbp), %eax + addl $-0x1, %eax + movl %eax, -0x18(%rbp) + jmp Ltmp3 + jmp Ltmp4 + jmp Ltmp1 + +Ltmp0: + xorl %eax, %eax + addq $0x20, %rsp + popq %rbp + retq +.Lfunc_end0: + .cfi_endproc +.size main, .-main + + .section .gcc_except_table,"a",@progbits + .p2align 2, 0x0 +GCC_except_table0: +.Lexception0: + .byte 255 # @LPStart Encoding = omit + .byte 255 # @TType Encoding = omit + .byte 1 # Call site Encoding = uleb128 + .uleb128 .Lcst_end0-.Lcst_begin0 +.Lcst_begin0: + .uleb128 .Lfunc_begin0-.Lfunc_begin0 # >> Call Site 1 << + .uleb128 .Lfunc_end0-.Lfunc_begin0 # Call between .Lfunc_begin0 and .Lfunc_end0 + .uleb128 Ltmp3-.Lfunc_begin0 # jumps to Ltmp3 + .byte 0 # has no landing pad + .byte 0 # On action: cleanup +.Lcst_end0: + .p2align 2, 0x0 + .hidden DW.ref.__gxx_personality_v0 + .weak DW.ref.__gxx_personality_v0 + .section .data.DW.ref.__gxx_personality_v0,"awG",@progbits,DW.ref.__gxx_personality_v0,comdat + .p2align 3, 0x0 + .type DW.ref.__gxx_personality_v0,@object