[𝘀𝗽𝗿] changes to main this commit is based on

Created using spr 1.3.4

[skip ci]
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 6ebbaf9..3ce3be0 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -906,6 +906,10 @@
     return BB && BB->getOffset() == Offset ? BB : nullptr;
   }
 
+  const BinaryBasicBlock *getBasicBlockAtOffset(uint64_t Offset) const {
+    return const_cast<BinaryFunction *>(this)->getBasicBlockAtOffset(Offset);
+  }
+
   /// Retrieve the landing pad BB associated with invoke instruction \p Invoke
   /// that is in \p BB. Return nullptr if none exists
   BinaryBasicBlock *getLandingPadBBFor(const BinaryBasicBlock &BB,
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 6453b30..2880bfd 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -266,7 +266,8 @@
                      uint64_t Mispreds);
 
   /// Register a \p Branch.
-  bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds);
+  bool doBranch(uint64_t From, uint64_t To, uint64_t Count, uint64_t Mispreds,
+                bool IsPreagg);
 
   /// Register a trace between two LBR entries supplied in execution order.
   bool doTrace(const LBREntry &First, const LBREntry &Second,
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index fcde6f5..6e6f528 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -774,42 +774,75 @@
 }
 
 bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
-                              uint64_t Mispreds) {
-  bool IsReturn = false;
-  auto handleAddress = [&](uint64_t &Addr, bool IsFrom) -> BinaryFunction * {
-    if (BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr)) {
-      Addr -= Func->getAddress();
-      if (IsFrom) {
-        auto checkReturn = [&](auto MaybeInst) {
-          IsReturn = MaybeInst && BC->MIB->isReturn(*MaybeInst);
-        };
-        if (Func->hasInstructions())
-          checkReturn(Func->getInstructionAtOffset(Addr));
-        else
-          checkReturn(Func->disassembleInstructionAtOffset(Addr));
-      }
-
-      if (BAT)
-        Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);
-
-      if (BinaryFunction *ParentFunc = getBATParentFunction(*Func)) {
-        Func = ParentFunc;
-        if (IsFrom)
-          NumColdSamples += Count;
-      }
-
-      return Func;
-    }
-    return nullptr;
+                              uint64_t Mispreds, bool IsPreagg) {
+  // Returns whether \p Offset in \p Func contains a return instruction.
+  auto checkReturn = [&](const BinaryFunction &Func, const uint64_t Offset) {
+    auto isReturn = [&](auto MI) { return MI && BC->MIB->isReturn(*MI); };
+    return Func.hasInstructions()
+               ? isReturn(Func.getInstructionAtOffset(Offset))
+               : isReturn(Func.disassembleInstructionAtOffset(Offset));
   };
 
-  BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true);
+  // Returns whether \p Offset in \p Func may be a call continuation excluding
+  // entry points and landing pads.
+  auto checkCallCont = [&](const BinaryFunction &Func, const uint64_t Offset) {
+    // No call continuation at a function start.
+    if (!Offset)
+      return false;
+
+    // FIXME: support BAT case where the function might be in empty state
+    // (split fragments declared non-simple).
+    if (!Func.hasCFG())
+      return false;
+
+    // The offset should not be an entry point or a landing pad.
+    const BinaryBasicBlock *ContBB = Func.getBasicBlockAtOffset(Offset);
+    return ContBB && !ContBB->isEntryPoint() && !ContBB->isLandingPad();
+  };
+
+  // Mutates \p Addr to an offset into the containing function, performing BAT
+  // offset translation and parent lookup.
+  //
+  // Returns the containing function (or BAT parent) and whether the address
+  // corresponds to a return (if \p IsFrom) or a call continuation (otherwise).
+  auto handleAddress = [&](uint64_t &Addr, bool IsFrom) {
+    BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr);
+    if (!Func)
+      return std::pair{Func, false};
+
+    Addr -= Func->getAddress();
+
+    bool IsRetOrCallCont =
+        IsFrom ? checkReturn(*Func, Addr) : checkCallCont(*Func, Addr);
+
+    if (BAT)
+      Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);
+
+    BinaryFunction *ParentFunc = getBATParentFunction(*Func);
+    if (!ParentFunc)
+      return std::pair{Func, IsRetOrCallCont};
+
+    if (IsFrom)
+      NumColdSamples += Count;
+
+    return std::pair{ParentFunc, IsRetOrCallCont};
+  };
+
+  uint64_t ToOrig = To;
+  auto [FromFunc, IsReturn] = handleAddress(From, /*IsFrom=*/true);
+  auto [ToFunc, IsCallCont] = handleAddress(To, /*IsFrom=*/false);
+  if (!FromFunc && !ToFunc)
+    return false;
+
+  // Record call to continuation trace.
+  if (IsPreagg && FromFunc != ToFunc && (IsReturn || IsCallCont)) {
+    LBREntry First{ToOrig - 1, ToOrig - 1, false};
+    LBREntry Second{ToOrig, ToOrig, false};
+    return doTrace(First, Second, Count);
+  }
   // Ignore returns.
   if (IsReturn)
     return true;
-  BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false);
-  if (!FromFunc && !ToFunc)
-    return false;
 
   // Treat recursive control transfers as inter-branches.
   if (FromFunc == ToFunc && To != 0) {
@@ -826,10 +859,19 @@
   BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(Second.From);
   if (!FromFunc || !ToFunc) {
     LLVM_DEBUG({
-      dbgs() << "Out of range trace starting in " << FromFunc->getPrintName()
-             << formatv(" @ {0:x}", First.To - FromFunc->getAddress())
-             << " and ending in " << ToFunc->getPrintName()
-             << formatv(" @ {0:x}\n", Second.From - ToFunc->getAddress());
+      dbgs() << "Out of range trace starting in ";
+      if (FromFunc)
+        dbgs() << formatv("{0} @ {1:x}", *FromFunc,
+                          First.To - FromFunc->getAddress());
+      else
+        dbgs() << Twine::utohexstr(First.To);
+      dbgs() << " and ending in ";
+      if (ToFunc)
+        dbgs() << formatv("{0} @ {1:x}", *ToFunc,
+                          Second.From - ToFunc->getAddress());
+      else
+        dbgs() << Twine::utohexstr(Second.From);
+      dbgs() << '\n';
     });
     NumLongRangeTraces += Count;
     return false;
@@ -1611,7 +1653,8 @@
   for (const auto &AggrLBR : BranchLBRs) {
     const Trace &Loc = AggrLBR.first;
     const TakenBranchInfo &Info = AggrLBR.second;
-    doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
+    doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount,
+             /*IsPreagg*/ false);
   }
 }
 
@@ -1772,7 +1815,7 @@
     switch (AggrEntry.EntryType) {
     case AggregatedLBREntry::BRANCH:
       doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
-               AggrEntry.Mispreds);
+               AggrEntry.Mispreds, /*IsPreagg=*/true);
       break;
     case AggregatedLBREntry::FT:
     case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s
new file mode 100644
index 0000000..31a7910
--- /dev/null
+++ b/bolt/test/X86/callcont-fallthru.s
@@ -0,0 +1,132 @@
+## Ensures that a call continuation fallthrough count is set when using
+## pre-aggregated perf data.
+
+# RUN: %clangxx %cxxflags %s -o %t -Wl,-q -nostdlib
+# RUN: link_fdata %s %t %t.pa1 PREAGG
+# RUN: link_fdata %s %t %t.pa2 PREAGG2
+# RUN: link_fdata %s %t %t.pa3 PREAGG3
+# RUN: link_fdata %s %t %t.pa4 PREAGG4
+
+## Check normal case: fallthrough is not LP or secondary entry.
+# RUN: llvm-strip --strip-unneeded %t -o %t.exe
+# RUN: llvm-bolt %t.exe --pa -p %t.pa1 -o %t.out \
+# RUN:   --print-cfg --print-only=main | FileCheck %s
+
+## Check that getFallthroughsInTrace correctly handles a trace starting at plt
+## call continuation
+# RUN: llvm-bolt %t.exe --pa -p %t.pa2 -o %t.out2 \
+# RUN:   --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK2
+
+## Check that we don't treat secondary entry points as call continuation sites.
+# RUN: llvm-bolt %t --pa -p %t.pa3 -o %t.out \
+# RUN:   --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK3
+
+## Check fallthrough to a landing pad case.
+# RUN: llvm-bolt %t.exe --pa -p %t.pa4 -o %t.out \
+# RUN:   --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK4
+
+  .globl foo
+  .type foo, %function
+foo:
+	pushq	%rbp
+	movq	%rsp, %rbp
+	popq	%rbp
+Lfoo_ret:
+	retq
+.size foo, .-foo
+
+  .globl main
+  .type main, %function
+main:
+.Lfunc_begin0:
+	.cfi_startproc
+	.cfi_personality 155, DW.ref.__gxx_personality_v0
+	.cfi_lsda 27, .Lexception0
+	pushq	%rbp
+	movq	%rsp, %rbp
+	subq	$0x20, %rsp
+	movl	$0x0, -0x4(%rbp)
+	movl	%edi, -0x8(%rbp)
+	movq	%rsi, -0x10(%rbp)
+	callq	puts@PLT
+## Target is a call continuation
+# PREAGG: B X:0 #Ltmp1# 2 0
+# CHECK:      callq puts@PLT
+# CHECK-NEXT: count: 2
+
+Ltmp1:
+	movq	-0x10(%rbp), %rax
+	movq	0x8(%rax), %rdi
+	movl	%eax, -0x14(%rbp)
+
+Ltmp4:
+	cmpl	$0x0, -0x14(%rbp)
+	je	Ltmp0
+# CHECK2:      je .Ltmp0
+# CHECK2-NEXT: count: 3
+
+	movl	$0xa, -0x18(%rbp)
+	callq	foo
+## Target is a call continuation
+# PREAGG: B #Lfoo_ret# #Ltmp3# 1 0
+# CHECK:      callq foo
+# CHECK-NEXT: count: 1
+
+## PLT call continuation fallthrough spanning the call
+# PREAGG2: F #Ltmp1# #Ltmp3_br# 3
+# CHECK2:      callq foo
+# CHECK2-NEXT: count: 3
+
+## Target is a secondary entry point
+# PREAGG3: B X:0 #Ltmp3# 2 0
+# CHECK3:      callq foo
+# CHECK3-NEXT: count: 0
+
+## Target is a landing pad
+# PREAGG4: B X:0 #Ltmp3# 2 0
+# CHECK4:      callq puts@PLT
+# CHECK4-NEXT: count: 0
+
+Ltmp3:
+	cmpl	$0x0, -0x18(%rbp)
+Ltmp3_br:
+	jmp	Ltmp2
+
+Ltmp2:
+	movl	-0x18(%rbp), %eax
+	addl	$-0x1, %eax
+	movl	%eax, -0x18(%rbp)
+	jmp	Ltmp3
+	jmp	Ltmp4
+	jmp	Ltmp1
+
+Ltmp0:
+	xorl	%eax, %eax
+	addq	$0x20, %rsp
+	popq	%rbp
+	retq
+.Lfunc_end0:
+  .cfi_endproc
+.size main, .-main
+
+	.section	.gcc_except_table,"a",@progbits
+	.p2align	2, 0x0
+GCC_except_table0:
+.Lexception0:
+	.byte	255                             # @LPStart Encoding = omit
+	.byte	255                             # @TType Encoding = omit
+	.byte	1                               # Call site Encoding = uleb128
+	.uleb128 .Lcst_end0-.Lcst_begin0
+.Lcst_begin0:
+	.uleb128 .Lfunc_begin0-.Lfunc_begin0    # >> Call Site 1 <<
+	.uleb128 .Lfunc_end0-.Lfunc_begin0           #   Call between .Lfunc_begin0 and .Lfunc_end0
+	.uleb128 Ltmp3-.Lfunc_begin0           #     jumps to Ltmp3
+	.byte	0                               #     has no landing pad
+	.byte	0                               #   On action: cleanup
+.Lcst_end0:
+	.p2align	2, 0x0
+	.hidden	DW.ref.__gxx_personality_v0
+	.weak	DW.ref.__gxx_personality_v0
+	.section	.data.DW.ref.__gxx_personality_v0,"awG",@progbits,DW.ref.__gxx_personality_v0,comdat
+	.p2align	3, 0x0
+	.type	DW.ref.__gxx_personality_v0,@object