summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormingmingl <mingmingl@google.com>2024-06-06 23:12:04 -0700
committermingmingl <mingmingl@google.com>2024-06-06 23:12:12 -0700
commit4f6b7ab1e5640a36316cffc3f879b6c6f4408d54 (patch)
treeb7fbec420bb1532abcabe4f7eaeb5965af635999
parent142845ce3830a43ad80a5ad81d4c6518c2eebd8e (diff)
1. Resolve review comments. 2. Handle vtable's PGO name, like what we do for indirect-call promotion. - InstrProf.h/cpp and PGOInstrumentation.cpp are modified. 3. Make use of 'MaxNumVTableAnnotations' in PGOInstrumentation.cpp
-rw-r--r--compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp34
-rw-r--r--llvm/include/llvm/ProfileData/InstrProf.h10
-rw-r--r--llvm/lib/ProfileData/InstrProf.cpp40
-rw-r--r--llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp301
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp30
-rw-r--r--llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll34
-rw-r--r--llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll10
-rw-r--r--llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll7
8 files changed, 279 insertions, 187 deletions
diff --git a/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp
index 19def403cca7..0a32034f182a 100644
--- a/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp
+++ b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp
@@ -110,26 +110,40 @@
// ICTEXT: _ZTV8Derived1:250
// Test indirect call promotion transformation using vtable profiles.
-// Build with `-g` to enable debug information.
-// RUN: %clangxx -m64 -fprofile-use=test.profdata -fuse-ld=lld -g -flto=thin -fwhole-program-vtables -O2 -mllvm -enable-vtable-value-profiling -mllvm -icp-enable-vtable-cmp -Rpass=pgo-icall-prom -mllvm -print-after=pgo-icall-prom -mllvm -filter-print-funcs=main %s 2>&1 | FileCheck %s --check-prefixes=REMARK,IR --implicit-check-not="!VP"
+// - Build with `-g` to enable debug information.
+// - In real world settings, ICP pass is disabled in prelink pipeline. In
+// the postlink pipeline, ICP is enabled after whole-program-devirtualization
+// pass. Do the same thing in this test.
+// - Enable `-fwhole-program-vtables` generate type metadata and intrincis.
+// - Enable `-fno-split-lto-unit` and `-Wl,-lto-whole-program-visibility` to
+// preserve type intrinsics for ICP pass.
+// RUN: %clangxx -m64 -fprofile-use=test.profdata -Wl,--lto-whole-program-visibility \
+// RUN: -mllvm -disable-icp=true -Wl,-mllvm,-disable-icp=false -fuse-ld=lld \
+// RUN: -g -flto=thin -fwhole-program-vtables -fno-split-lto-unit -O2 \
+// RUN: -mllvm -enable-vtable-value-profiling -Wl,-mllvm,-enable-vtable-value-profiling \
+// RUN: -mllvm -enable-vtable-profile-use \
+// RUN: -Wl,-mllvm,-enable-vtable-profile-use -Rpass=pgo-icall-prom \
+// RUN: -Wl,-mllvm,-print-after=pgo-icall-prom \
+// RUN: -Wl,-mllvm,-filter-print-funcs=main %s 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=REMARK,IR --implicit-check-not="!VP"
// For the indirect call site `ptr->func`
-// REMARK: instrprof-vtable-value-prof.cpp:191:19: remark: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii with count 150 out of 200, compare 1 vtables and sink 1 instructions
-// REMARK: instrprof-vtable-value-prof.cpp:191:19: remark: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, compare 1 vtables and sink 1 instructions
+// REMARK: instrprof-vtable-value-prof.cpp:205:19: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii with count 150 out of 200, compare 1 vtables and sink 1 instructions
+// REMARK: instrprof-vtable-value-prof.cpp:205:19: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, compare 1 vtables and sink 1 instructions
//
// For the indirect call site `delete ptr`
-// REMARK: instrprof-vtable-value-prof.cpp:193:5: remark: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev with count 750 out of 1000, compare 1 vtables and sink 2 instructions
-// REMARK: instrprof-vtable-value-prof.cpp:193:5: remark: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, compare 1 vtables and sink 2 instructions
+// REMARK: instrprof-vtable-value-prof.cpp:207:5: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev with count 750 out of 1000, compare 1 vtables and sink 2 instructions
+// REMARK: instrprof-vtable-value-prof.cpp:207:5: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, compare 1 vtables and sink 2 instructions
// The IR matchers for indirect callsite `ptr->func`.
// IR-LABEL: @main
-// IR: [[OBJ:%.*]] = call {{.*}} @_Z10createTypei
+// IR: [[OBJ:%.*]] = {{.*}}call {{.*}} @_Z10createTypei
// IR: [[VTABLE:%.*]] = load ptr, ptr [[OBJ]]
// IR: [[CMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTVN12_GLOBAL__N_18Derived2E, i32 16)
// IR: br i1 [[CMP1]], label %[[BB1:.*]], label %[[BB2:[a-zA-Z0-9_.]+]],
//
// IR: [[BB1]]:
-// IR: [[RESBB1:%.*]] = call {{.*}} @_ZN12_GLOBAL__N_18Derived24funcEii
+// IR: [[RESBB1:%.*]] = {{.*}}call {{.*}} @_ZN12_GLOBAL__N_18Derived24funcEii
// IR: br label %[[MERGE0:[a-zA-Z0-9_.]+]]
//
// IR: [[BB2]]:
@@ -137,12 +151,12 @@
// IR: br i1 [[CMP2]], label %[[BB3:.*]], label %[[BB4:[a-zA-Z0-9_.]+]],
//
// IR: [[BB3]]:
-// IR: [[RESBB3:%.*]] = call {{.*}} @_ZN8Derived14funcEii
+// IR: [[RESBB3:%.*]] = {{.*}}call {{.*}} @_ZN8Derived14funcEii
// IR: br label %[[MERGE1:[a-zA-Z0-9_.]+]],
//
// IR: [[BB4]]:
// IR: [[FUNCPTR:%.*]] = load ptr, ptr [[VTABLE]]
-// IR: [[RESBB4:%.*]] = call {{.*}} [[FUNCPTR]]
+// IR: [[RESBB4:%.*]] = {{.*}}call {{.*}} [[FUNCPTR]]
// IR: br label %[[MERGE1]]
//
// IR: [[MERGE1]]:
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 88c7fe425b5a..817005bd28d8 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -304,8 +304,12 @@ getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind,
uint32_t MaxNumValueData, uint32_t &ActualNumValueData,
uint64_t &TotalC, bool GetNoICPValue = false);
+// TODO: Unify metadata name 'PGOFuncName' and 'PGOName', by supporting read
+// of this metadata for backward compatibility and generating 'PGOName' only.
inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; }
+inline StringRef getPGONameMetadataName() { return "PGOName"; }
+
/// Return the PGOFuncName meta data associated with a function.
MDNode *getPGOFuncNameMetadata(const Function &F);
@@ -314,8 +318,14 @@ std::string getPGOName(const GlobalVariable &V, bool InLTO = false);
/// Create the PGOFuncName meta data if PGOFuncName is different from
/// function's raw name. This should only apply to internal linkage functions
/// declared by users only.
+/// TODO: Update all callers to 'createPGONameMetadata' and deprecate this
+/// function.
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName);
+/// Create the PGOName metadata if a global object's PGO name is different from
+/// its mangled name. This should apply to local-linkage global objects only.
+void createPGONameMetadata(GlobalObject &GO, StringRef PGOName);
+
/// Check if we can use Comdat for profile variables. This will eliminate
/// the duplicated profile variables for Comdat functions.
bool needsComdatForCounter(const GlobalObject &GV, const Module &M);
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 806d01de1ada..4649db2d92ec 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -228,6 +228,12 @@ cl::opt<bool> EnableVTableValueProfiling(
"the types of a C++ pointer. The information is used in indirect "
"call promotion to do selective vtable-based comparison."));
+cl::opt<bool> EnableVTableProfileUse(
+ "enable-vtable-profile-use", cl::init(false),
+ cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable "
+ "profiles will be used by ICP pass for more efficient indirect "
+ "call sequence. If false, type profiles won't be used."));
+
std::string getInstrProfSectionName(InstrProfSectKind IPSK,
Triple::ObjectFormatType OF,
bool AddSegmentInfo) {
@@ -391,7 +397,7 @@ std::string getPGOName(const GlobalVariable &V, bool InLTO) {
// PGONameMetadata should be set by compiler at profile use time
// and read by symtab creation to look up symbols corresponding to
// a MD5 hash.
- return getIRPGOObjectName(V, InLTO, /*PGONameMetadata=*/nullptr);
+ return getIRPGOObjectName(V, InLTO, V.getMetadata(getPGONameMetadataName()));
}
// See getIRPGOObjectName() for a discription of the format.
@@ -480,8 +486,7 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
for (GlobalVariable &G : M.globals()) {
if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
continue;
- if (Error E = addVTableWithName(
- G, getIRPGOObjectName(G, InLTO, /* PGONameMetadata */ nullptr)))
+ if (Error E = addVTableWithName(G, getPGOName(G, InLTO)))
return E;
}
@@ -1393,16 +1398,27 @@ MDNode *getPGOFuncNameMetadata(const Function &F) {
return F.getMetadata(getPGOFuncNameMetadataName());
}
-void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) {
- // Only for internal linkage functions.
- if (PGOFuncName == F.getName())
- return;
- // Don't create duplicated meta-data.
- if (getPGOFuncNameMetadata(F))
+static void createPGONameMetadata(GlobalObject &GO, StringRef MetadataName,
+ StringRef PGOName) {
+ // For internal linkage objects, its name is not the same as its PGO name.
+ if (GO.getName() == PGOName)
return;
- LLVMContext &C = F.getContext();
- MDNode *N = MDNode::get(C, MDString::get(C, PGOFuncName));
- F.setMetadata(getPGOFuncNameMetadataName(), N);
+
+ // Don't created duplictaed metadata.
+ if (GO.getMetadata(MetadataName))
+ return;
+
+ LLVMContext &C = GO.getContext();
+ MDNode *N = MDNode::get(C, MDString::get(C, PGOName));
+ GO.setMetadata(MetadataName, N);
+}
+
+void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) {
+ return createPGONameMetadata(F, getPGOFuncNameMetadataName(), PGOFuncName);
+}
+
+void createPGONameMetadata(GlobalObject &GO, StringRef PGOName) {
+ return createPGONameMetadata(GO, getPGONameMetadataName(), PGOName);
}
bool needsComdatForCounter(const GlobalObject &GO, const Module &M) {
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 5384fa4e3794..c805cf3f22f3 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -58,6 +58,10 @@ STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
extern cl::opt<unsigned> MaxNumVTableAnnotations;
+namespace llvm {
+extern cl::opt<bool> EnableVTableProfileUse;
+}
+
// Command line option to disable indirect-call promotion with the default as
// false. This is for debug purpose.
static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
@@ -110,29 +114,31 @@ static cl::opt<bool>
ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
cl::desc("Dump IR after transformation happens"));
-// This option is meant to be used by LLVM regression test and test the
-// transformation that compares vtables.
-static cl::opt<bool> ICPEnableVTableCmp(
- "icp-enable-vtable-cmp", cl::init(false), cl::Hidden,
- cl::desc("If ThinLTO and WPD is enabled and this option is true, "
- "indirect-call promotion pass will compare vtables rather than "
- "functions for speculative devirtualization of virtual calls."
- " If set to false, indirect-call promotion pass will always "
- "compare functions."));
-
-static cl::opt<float>
- ICPVTableCountPercentage("icp-vtable-count-percentage", cl::init(0.99),
- cl::Hidden,
- cl::desc("Percentage of vtable count to compare"));
-
-static cl::opt<int> ICPNumAdditionalVTableLast(
- "icp-num-additional-vtable-last", cl::init(0), cl::Hidden,
- cl::desc("The number of additional instruction for the last candidate"));
+// Indirect call promotion pass will fall back to function-based comparison if
+// vtable-count / function-count is smaller than this threshold.
+static cl::opt<float> ICPVTablePercentageThreshold(
+ "icp-vtable-percentage-threshold", cl::init(0.99), cl::Hidden,
+ cl::desc("The percentage threshold of vtable-count / function-count for "
+ "cost-benefit analysis. "));
+
+// Although comparing vtables can save a vtable load, we may need to compare
+// vtable pointer with multiple vtable address points due to class inheritance.
+// Comparing with multiple vtables inserts additional instructions on hot code
+// path; and doing so for earlier candidate of one icall can affect later
+// function candidate in an undesired way. We allow multiple vtable comparison
+// for the last function candidate and use the option below to cap the number
+// of vtables.
+static cl::opt<int> ICPMaxNumVTableLastCandidate(
+ "icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
+ cl::desc("The maximum number of vtable for the last candidate."));
namespace {
+// The key is a vtable global variable, and the value is a map.
+// In the inner map, the key represents address point offsets and the value is a
+// constant for this address point.
using VTableAddressPointOffsetValMap =
- SmallDenseMap<const GlobalVariable *, SmallDenseMap<int, Constant *, 4>, 8>;
+ SmallDenseMap<const GlobalVariable *, SmallDenseMap<int, Constant *>>;
// A struct to collect type information for a virtual call site.
struct VirtualCallSiteInfo {
@@ -146,19 +152,25 @@ struct VirtualCallSiteInfo {
// The key is a virtual call, and value is its type information.
using VirtualCallSiteTypeInfoMap =
- SmallDenseMap<const CallBase *, VirtualCallSiteInfo, 8>;
+ SmallDenseMap<const CallBase *, VirtualCallSiteInfo>;
+
+// The key is vtable GUID, and value is its value profile count.
+using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t>;
-// Find the offset where type string is `CompatibleType`.
+// Returns the address point offset of the given compatible type.
+//
+// Type metadata of a vtable specifies the types that can container a pointer to
+// this vtable, for example, `Base*` can be a pointer to an instantiated type
+// but not vice versa. See also https://llvm.org/docs/TypeMetadata.html
static std::optional<uint64_t>
-getCompatibleTypeOffset(const GlobalVariable &VTableVar,
- StringRef CompatibleType) {
- SmallVector<MDNode *, 2> Types; // type metadata associated with a vtable.
+getAddressPointOffset(const GlobalVariable &VTableVar,
+ StringRef CompatibleType) {
+ SmallVector<MDNode *> Types;
VTableVar.getMetadata(LLVMContext::MD_type, Types);
for (MDNode *Type : Types)
if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get());
TypeId && TypeId->getString() == CompatibleType)
-
return cast<ConstantInt>(
cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
->getZExtValue();
@@ -181,7 +193,7 @@ static Constant *getVTableAddressPointOffset(GlobalVariable *VTable,
llvm::ConstantInt::get(Type::getInt32Ty(Context), AddressPointOffset));
}
-// Returns the basic block in which `Inst` by `Use`.
+// Returns the basic block in which `Inst` is used via its `UserInst`.
static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) {
if (PHINode *PN = dyn_cast<PHINode>(UserInst))
return PN->getIncomingBlock(U);
@@ -199,7 +211,7 @@ static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) {
BasicBlock *BB = Inst->getParent();
assert(Inst->getParent() != DestBB &&
BB->getTerminator()->getNumSuccessors() == 2 &&
- "Caller should guarantee");
+ "Guaranteed by ICP transformation");
// Do not sink across a critical edge for simplicity.
if (DestBB->getUniquePredecessor() != BB)
return false;
@@ -225,18 +237,14 @@ static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) {
// For the virtual call dispatch sequence, try to sink vtable load instructions
// to the cold indirect call fallback.
+// FIXME: Move the sink eligibility check below to a utility function in
+// Transforms/Utils/ directory.
static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
- assert(!I->isTerminator());
if (!isDestBBSuitableForSink(I, DestBlock))
return false;
- assert(DestBlock->getUniquePredecessor() == I->getParent());
-
- // Do not move control-flow-involving, volatile loads, vaarg, etc.
- // Do not sink static or dynamic alloca instructions. Static allocas must
- // remain in the entry block, and dynamic allocas must not be sunk in between
- // a stacksave / stackrestore pair, which would incorrectly shorten its
- // lifetime.
+ // Do not move control-flow-involving, volatile loads, vaarg, alloca
+ // instructions, etc.
if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
isa<AllocaInst>(I))
return false;
@@ -253,12 +261,16 @@ static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
// We can only sink load instructions if there is nothing between the load and
// the end of block that could change the value.
if (I->mayReadFromMemory()) {
- // We know that SrcBlock is the unique predecessor of DestBlock.
+ // We already know that SrcBlock is the unique predecessor of DestBlock.
for (BasicBlock::iterator Scan = std::next(I->getIterator()),
E = I->getParent()->end();
- Scan != E; ++Scan)
+ Scan != E; ++Scan) {
+ // Note analysis analysis can tell whether two pointers can point to the
+ // same object in memory or not thereby find further opportunities to
+ // sink.
if (Scan->mayWriteToMemory())
return false;
+ }
}
BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
@@ -273,12 +285,10 @@ static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
// Try to sink instructions after VPtr to the indirect call fallback.
// Returns the number of sunk IR instructions.
-static int tryToSinkInstructions(Instruction *VPtr,
+static int tryToSinkInstructions(BasicBlock *OriginalBB,
BasicBlock *IndirectCallBB) {
- BasicBlock *OriginalBB = VPtr->getParent();
-
int SinkCount = 0;
- // FIXME: Find a way to bail out of the loop.
+ // Sink all eligible instructions in OriginalBB in reverse order.
for (Instruction &I :
llvm::make_early_inc_range(llvm::drop_begin(llvm::reverse(*OriginalBB))))
if (tryToSinkInstruction(&I, IndirectCallBB))
@@ -314,16 +324,19 @@ private:
Function *const TargetFunction;
const uint64_t Count;
- // The byte offset of TargetFunction starting from the vtable address point.
- uint64_t FunctionOffset;
- SmallVector<std::pair<uint64_t, uint64_t>, 2> VTableGUIDAndCounts;
- SmallVector<Constant *, 2> AddressPoints;
+ // The following fields only exists for promotion candidates with vtable
+ // information.
+ //
+ // Due to class inheritance, one virtual call candidate can come from
+ // multiple vtables. `VTableGUIDAndCounts` tracks the vtable GUIDs and
+ // counts for 'TargetFunction'. `AddressPoints` stores the vtable address
+ // points for comparison.
+ VTableGUIDCountsMap VTableGUIDAndCounts;
+ SmallVector<Constant *> AddressPoints;
PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
};
- using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t, 4>;
-
// Check if the indirect-call call site should be promoted. Return the number
// of promotions. Inst is the candidate indirect call, ValueDataRef
// contains the array of value profile data for profiled targets,
@@ -356,9 +369,13 @@ private:
bool isProfitableToCompareVTables(
const std::vector<PromotionCandidate> &Candidates, uint64_t TotalCount);
- // Populate `VTableGUIDCounts` vtable GUIDs and their counts and each
- // candidate with vtable information. Returns the vtable instruction if not
- // null.
+ // Given an indirect callsite and the list of function candidates, compute
+ // the following vtable information in output parameters and returns vtable
+ // pointer if type profiles exist.
+ // - Populate `VTableGUIDCounts` with <vtable-guid, count> with !prof metadata
+ // attached on the vtable pointer.
+ // - For each function candidate, finds out the vtables from which it get
+ // called and stores the <vtable-guid, count> there.
Instruction *computeVTableInfos(const CallBase *CB,
VTableGUIDCountsMap &VTableGUIDCounts,
std::vector<PromotionCandidate> &Candidates);
@@ -490,9 +507,32 @@ Constant *IndirectCallPromoter::getOrCreateVTableAddressPointVar(
Instruction *IndirectCallPromoter::computeVTableInfos(
const CallBase *CB, VTableGUIDCountsMap &GUIDCountsMap,
std::vector<PromotionCandidate> &Candidates) {
- if (!ICPEnableVTableCmp)
+ if (!EnableVTableProfileUse)
return nullptr;
+ // Take the following code sequence as an example, here is how the code works
+ // @vtable1 = {[n x ptr] [... ptr @func1]}
+ // @vtable2 = {[m x ptr] [... ptr @func2]}
+ //
+ // %vptr = load ptr, ptr %d, !prof !0
+ // %0 = tail call i1 @llvm.type.test(ptr %vptr, metadata !"vtable1")
+ // tail call void @llvm.assume(i1 %0)
+ // %vfn = getelementptr inbounds ptr, ptr %vptr, i64 1
+ // %1 = load ptr, ptr %vfn
+ // call void %1(ptr %d), !prof !1
+ //
+ // !0 = !{!"VP", i32 2, i64 100, i64 123, i64 50, i64 456, i64 50}
+ // !1 = !{!"VP", i32 0, i64 100, i64 789, i64 50, i64 579, i64 50}
+ //
+ // Step 1. Find out the %vptr instruction for indirect call and use its !prof
+ // to populate `GUIDCountsMap`.
+ // Step 2. For each vtable-guid, look up its definition from symtab. LTO can
+ // make vtable definitions visible across modules.
+ // Step 3. Compute the byte offset of the virtual call, by adding vtable
+ // address point offset and function's offset relative to vtable address
+ // point. For each function candidate, this step tells us the vtable from
+ // which it comes from, and the vtable address point to compare %vptr with.
+
// Only virtual calls have virtual call site info.
auto Iter = VirtualCSInfo.find(CB);
if (Iter == VirtualCSInfo.end())
@@ -525,7 +565,7 @@ Instruction *IndirectCallPromoter::computeVTableInfos(
}
std::optional<uint64_t> MaybeAddressPointOffset =
- getCompatibleTypeOffset(*VTableVar, VirtualCallInfo.CompatibleTypeStr);
+ getAddressPointOffset(*VTableVar, VirtualCallInfo.CompatibleTypeStr);
if (!MaybeAddressPointOffset)
continue;
@@ -541,8 +581,9 @@ Instruction *IndirectCallPromoter::computeVTableInfos(
continue;
auto &Candidate = Candidates[CalleeIndexIter->second];
- Candidate.VTableGUIDAndCounts.push_back(
- {VTableVal, VTableValueDataArray[j].Count});
+ // There shouldn't be duplicate GUIDs in one !prof metadata, so assign
+ // counters directly won't cause overwrite or counter loss.
+ Candidate.VTableGUIDAndCounts[VTableVal] = VTableValueDataArray[j].Count;
Candidate.AddressPoints.push_back(
getOrCreateVTableAddressPointVar(VTableVar, AddressPointOffset));
}
@@ -550,23 +591,23 @@ Instruction *IndirectCallPromoter::computeVTableInfos(
return VPtr;
}
-static MDNode *getBranchWeights(LLVMContext &Context, uint64_t IfCount,
- uint64_t ElseCount) {
+// Creates 'branch_weights' prof metadata using TrueWeight and FalseWeight.
+// Scales uint64_t counters down to uint32_t if necessary to prevent overflow.
+static MDNode *createBranchWeights(LLVMContext &Context, uint64_t TrueWeight,
+ uint64_t FalseWeight) {
MDBuilder MDB(Context);
- uint64_t Scale = calculateCountScale(std::max(IfCount, ElseCount));
- return MDB.createBranchWeights(scaleBranchCount(IfCount, Scale),
- scaleBranchCount(ElseCount, Scale));
+ uint64_t Scale = calculateCountScale(std::max(TrueWeight, FalseWeight));
+ return MDB.createBranchWeights(scaleBranchCount(TrueWeight, Scale),
+ scaleBranchCount(FalseWeight, Scale));
}
CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
uint64_t Count, uint64_t TotalCount,
bool AttachProfToDirectCall,
OptimizationRemarkEmitter *ORE) {
- MDNode *BranchWeights =
- getBranchWeights(CB.getContext(), Count, TotalCount - Count);
-
- CallBase &NewInst =
- promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights);
+ CallBase &NewInst = promoteCallWithIfThenElse(
+ CB, DirectCallee,
+ createBranchWeights(CB.getContext(), Count, TotalCount - Count));
if (AttachProfToDirectCall)
setBranchWeights(NewInst, {static_cast<uint32_t>(Count)});
@@ -600,10 +641,13 @@ bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
NumOfPGOICallPromotion++;
NumPromoted++;
- if (!ICPEnableVTableCmp || C.VTableGUIDAndCounts.empty())
+ if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty())
continue;
- // Update VTableGUIDCounts
+ // After a virtual call candidate gets promoted, update the vtable's counts
+ // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents
+ // a vtable from which the virtual call is loaded. Compute the sum and use
+ // 128-bit APInt to improve accuracy.
uint64_t SumVTableCount = 0;
for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts)
SumVTableCount += VTableCount;
@@ -671,22 +715,20 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
MutableArrayRef<InstrProfValueData> ICallProfDataRef,
VTableGUIDCountsMap &VTableGUIDCounts) {
SmallVector<uint64_t, 4> PromotedFuncCount;
+
for (const auto &Candidate : Candidates) {
- uint64_t IfCount = 0;
- for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts) {
- IfCount += Count;
+ for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
VTableGUIDCounts[GUID] -= Count;
- }
- // Use indirect call counters to compute branch weights.
+ // 'OriginalBB' is the basic block of indirect call before indirect call
+ // promotion.
BasicBlock *OriginalBB = CB.getParent();
promoteCallWithVTableCmp(
CB, VPtr, Candidate.TargetFunction, Candidate.AddressPoints,
- getBranchWeights(CB.getContext(), IfCount, TotalFuncCount - IfCount));
+ createBranchWeights(CB.getContext(), Candidate.Count,
+ TotalFuncCount - Candidate.Count));
- int SinkCount = tryToSinkInstructions(
- PromotedFuncCount.empty() ? VPtr : OriginalBB->getFirstNonPHI(),
- CB.getParent());
+ int SinkCount = tryToSinkInstructions(OriginalBB, CB.getParent());
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB)
@@ -700,9 +742,9 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
<< " instructions";
});
- PromotedFuncCount.push_back(IfCount);
+ PromotedFuncCount.push_back(Candidate.Count);
- TotalFuncCount -= IfCount;
+ TotalFuncCount -= Candidate.Count;
NumOfPGOICallPromotion++;
}
@@ -711,8 +753,10 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
// Update value profiles for 'CB' and 'VPtr', assuming that each 'CB' has a
// a distinct 'VPtr'.
- // TODO: Handle profile update properly when Clang `-fstrict-vtable-pointers`
- // is enabled and a vtable is used to load multiple virtual functions.
+ // FIXME: When Clang `-fstrict-vtable-pointers` is enabled, a vtable might be
+ // used to load multiple virtual functions. The vtable profiles needs to be
+ // updated properly in that case (e.g, annotate type profiles per indirect
+ // call).
for (size_t I = 0; I < PromotedFuncCount.size(); I++)
ICallProfDataRef[I].Count -=
std::max(PromotedFuncCount[I], ICallProfDataRef[I].Count);
@@ -770,7 +814,7 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
// cannot sink to indirect fallback.
bool IndirectCallPromoter::isProfitableToCompareVTables(
const std::vector<PromotionCandidate> &Candidates, uint64_t TotalCount) {
- if (!ICPEnableVTableCmp || Candidates.empty())
+ if (!EnableVTableProfileUse || Candidates.empty())
return false;
uint64_t RemainingVTableCount = TotalCount;
for (size_t I = 0; I < Candidates.size(); I++) {
@@ -779,17 +823,16 @@ bool IndirectCallPromoter::isProfitableToCompareVTables(
for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
VTableSumCount += Count;
- if (VTableSumCount < Candidate.Count * ICPVTableCountPercentage)
+ if (VTableSumCount < Candidate.Count * ICPVTablePercentageThreshold)
return false;
RemainingVTableCount -= Candidate.Count;
- int NumAdditionalVTable = 0;
+ int MaxNumVTable = 1;
if (I == Candidates.size() - 1)
- NumAdditionalVTable = ICPNumAdditionalVTableLast;
+ MaxNumVTable = ICPMaxNumVTableLastCandidate;
- int ActualNumAdditionalInst = Candidate.AddressPoints.size() - 1;
- if (ActualNumAdditionalInst > NumAdditionalVTable) {
+ if ((int)Candidate.AddressPoints.size() > MaxNumVTable) {
return false;
}
}
@@ -810,45 +853,6 @@ computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM,
return FAM.getResult<DominatorTreeAnalysis>(F);
};
- auto compute = [&](Function *Func) {
- if (!Func || Func->use_empty())
- return;
- // Iterate all type.test calls and find all indirect calls.
- // TODO: Add llvm.public.type.test
- for (Use &U : llvm::make_early_inc_range(Func->uses())) {
- auto *CI = dyn_cast<CallInst>(U.getUser());
- if (!CI)
- continue;
- auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
- if (!TypeMDVal)
- continue;
- auto *CompatibleTypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
- if (!CompatibleTypeId)
- continue;
-
- // Find out all devirtualizable call sites given a llvm.type.test
- // intrinsic call.
- SmallVector<DevirtCallSite, 1> DevirtCalls;
- SmallVector<CallInst *, 1> Assumes;
- auto &DT = LookupDomTree(*CI->getFunction());
- findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
-
- // type-id, offset from the address point
- // combined with type metadata to compute function offset
- for (auto &DevirtCall : DevirtCalls) {
- CallBase &CB = DevirtCall.CB;
- // Given an indirect call, try find the instruction which loads a
- // pointer to virtual table.
- Instruction *VTablePtr =
- PGOIndirectCallVisitor::tryGetVTableInstruction(&CB);
- if (!VTablePtr)
- continue;
- VirtualCSInfo[&CB] = {DevirtCall.Offset, VTablePtr,
- CompatibleTypeId->getString()};
- }
- }
- };
-
// Right now only llvm.type.test is used to find out virtual call sites.
// With ThinLTO and whole-program-devirtualization, llvm.type.test and
// llvm.public.type.test are emitted, and llvm.public.type.test is either
@@ -859,12 +863,39 @@ computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM,
// that case.
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
+ if (!TypeTestFunc || TypeTestFunc->use_empty())
+ return;
+ // Iterate all type.test calls and find all indirect calls.
+ for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
+ auto *CI = dyn_cast<CallInst>(U.getUser());
+ if (!CI)
+ continue;
+ auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
+ if (!TypeMDVal)
+ continue;
+ auto *CompatibleTypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
+ if (!CompatibleTypeId)
+ continue;
- compute(TypeTestFunc);
-
- Function *PublicTypeTestFunc =
- M.getFunction(Intrinsic::getName(Intrinsic::public_type_test));
- compute(PublicTypeTestFunc);
+ // Find out all devirtualizable call sites given a llvm.type.test
+ // intrinsic call.
+ SmallVector<DevirtCallSite, 1> DevirtCalls;
+ SmallVector<CallInst *, 1> Assumes;
+ auto &DT = LookupDomTree(*CI->getFunction());
+ findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
+
+ for (auto &DevirtCall : DevirtCalls) {
+ CallBase &CB = DevirtCall.CB;
+ // Given an indirect call, try find the instruction which loads a
+ // pointer to virtual table.
+ Instruction *VTablePtr =
+ PGOIndirectCallVisitor::tryGetVTableInstruction(&CB);
+ if (!VTablePtr)
+ continue;
+ VirtualCSInfo[&CB] = {DevirtCall.Offset, VTablePtr,
+ CompatibleTypeId->getString()};
+ }
+ }
}
// A wrapper function that does the actual work.
@@ -883,11 +914,13 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
- // This map records states across functions in an LLVM IR module.
- // IndirectCallPromoter processes one
- // function at a time and updates this map with new entries the first time
- // the entry is needed in the module; the subsequent functions could re-use
- // map entries inserted when processing prior functions.
+ // VTableAddressPointOffsetVal stores the vtable address points. The vtable
+ // address point of a given <vtable, address point offset> is static (doesn't
+ // change after being computed once).
+ // IndirectCallPromoter::getOrCreateVTableAddressPointVar creates the map
+ // entry the first time a <vtable, offset> pair is seen, as
+ // promoteIndirectCalls processes an IR module and calls IndirectCallPromoter
+ // repeatedly on each function.
VTableAddressPointOffsetValMap VTableAddressPointOffsetVal;
for (auto &F : M) {
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 2269c2e0fffa..26444a5f3d13 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -319,6 +319,8 @@ static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold(
cl::desc("Do not instrument functions with the number of critical edges "
" greater than this threshold."));
+extern cl::opt<unsigned> MaxNumVTableAnnotations;
+
namespace llvm {
// Command line option to turn on CFG dot dump after profile annotation.
// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
@@ -331,6 +333,7 @@ extern cl::opt<std::string> ViewBlockFreqFuncName;
// Command line option to enable vtable value profiling. Defined in
// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
extern cl::opt<bool> EnableVTableValueProfiling;
+extern cl::opt<bool> EnableVTableProfileUse;
extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;
} // namespace llvm
@@ -1726,6 +1729,14 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
llvm_unreachable("Unknown visiting mode");
}
+static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind) {
+ if (ValueProfKind == IPVK_MemOPSize)
+ return MaxNumMemOPAnnotations;
+ if (ValueProfKind == llvm::IPVK_VTableTarget)
+ return MaxNumVTableAnnotations;
+ return MaxNumAnnotations;
+}
+
// Traverse all valuesites and annotate the instructions for all value kind.
void PGOUseFunc::annotateValueSites() {
if (isValueProfilingDisabled())
@@ -1760,10 +1771,10 @@ void PGOUseFunc::annotateValueSites(uint32_t Kind) {
LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
<< "): Index = " << ValueSiteIndex << " out of "
<< NumValueSites << "\n");
- annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
- static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
- Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
- : MaxNumAnnotations);
+ annotateValueSite(
+ *M, *I.AnnotatedInst, ProfileRecord,
+ static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
+ getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind)));
ValueSiteIndex++;
}
}
@@ -2052,6 +2063,16 @@ static bool annotateAllFunctions(
return false;
}
+ if (EnableVTableProfileUse) {
+ for (GlobalVariable &G : M.globals()) {
+ if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
+ continue;
+
+ // Create the PGOFuncName meta data.
+ createPGONameMetadata(G, getPGOName(G, false /* InLTO*/));
+ }
+ }
+
// Add the profile summary (read from the header of the indexed summary) here
// so that we can use it below when reading counters (which checks if the
// function should be marked with a cold or inlinehint attribute).
@@ -2227,7 +2248,6 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
};
auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
-
if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
return PreservedAnalyses::all();
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
index 96a2b2360787..7b7f6d17d59f 100644
--- a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll
@@ -1,18 +1,18 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -icp-enable-vtable-cmp -icp-num-additional-vtable-last=2 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP
-; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -icp-enable-vtable-cmp -icp-num-additional-vtable-last=0 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
+; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP
+; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-@Base1 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0
-@Base2 = dso_local constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo] }, !type !2
-@Base3 = dso_local constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo] }, !type !6
+@Base1 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0
+@Base2 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo] }, !type !2
+@Base3 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo] }, !type !6
-@Derived1 = dso_local constant { [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived1_bar] }, !type !1, !type !2, !type !3
-@Derived2 = dso_local constant { [3 x ptr], [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo], [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived2_bar] }, !type !4, !type !5, !type !6, !type !7
-@Derived3 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0, !type !8
+@Derived1 = constant { [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived1_bar] }, !type !1, !type !2, !type !3
+@Derived2 = constant { [3 x ptr], [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo], [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived2_bar] }, !type !4, !type !5, !type !6, !type !7
+@Derived3 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0, !type !8
; VTABLE-CMP: remark: <unknown>:0:0: Promote indirect call to Derived1_bar with count 600 out of 1600, compare 1 vtables and sink 2 instructions
; VTABLE-CMP: remark: <unknown>:0:0: Promote indirect call to Derived2_bar with count 500 out of 1000, compare 1 vtables and sink 2 instructions
@@ -28,13 +28,13 @@ define void @test(ptr %d) {
; VTABLE-CMP-NEXT: [[TMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived1, i32 40)
; VTABLE-CMP-NEXT: br i1 [[TMP1]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF10:![0-9]+]]
; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG]]:
-; VTABLE-CMP-NEXT: tail call void @Derived1_bar(ptr [[D]])
+; VTABLE-CMP-NEXT: call void @Derived1_bar(ptr [[D]])
; VTABLE-CMP-NEXT: br label %[[IF_END_ICP:.*]]
; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT]]:
; VTABLE-CMP-NEXT: [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived2, i32 64)
; VTABLE-CMP-NEXT: br i1 [[TMP2]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF11:![0-9]+]]
; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG1]]:
-; VTABLE-CMP-NEXT: tail call void @Derived2_bar(ptr [[D]])
+; VTABLE-CMP-NEXT: call void @Derived2_bar(ptr [[D]])
; VTABLE-CMP-NEXT: br label %[[IF_END_ICP3:.*]]
; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT2]]:
; VTABLE-CMP-NEXT: [[TMP3:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Base1, i32 16)
@@ -42,12 +42,12 @@ define void @test(ptr %d) {
; VTABLE-CMP-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
; VTABLE-CMP-NEXT: br i1 [[TMP5]], label %[[IF_TRUE_DIRECT_TARG4:.*]], label %[[IF_FALSE_ORIG_INDIRECT5:.*]], !prof [[PROF12:![0-9]+]]
; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG4]]:
-; VTABLE-CMP-NEXT: tail call void @Base1_bar(ptr [[D]])
+; VTABLE-CMP-NEXT: call void @Base1_bar(ptr [[D]])
; VTABLE-CMP-NEXT: br label %[[IF_END_ICP6:.*]]
; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT5]]:
; VTABLE-CMP-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
; VTABLE-CMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VFN]], align 8
-; VTABLE-CMP-NEXT: tail call void [[TMP6]](ptr [[D]])
+; VTABLE-CMP-NEXT: call void [[TMP6]](ptr [[D]])
; VTABLE-CMP-NEXT: br label %[[IF_END_ICP6]]
; VTABLE-CMP: [[IF_END_ICP6]]:
; VTABLE-CMP-NEXT: br label %[[IF_END_ICP3]]
@@ -67,22 +67,22 @@ define void @test(ptr %d) {
; FUNC-CMP-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @Derived1_bar
; FUNC-CMP-NEXT: br i1 [[TMP2]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF10:![0-9]+]]
; FUNC-CMP: [[IF_TRUE_DIRECT_TARG]]:
-; FUNC-CMP-NEXT: tail call void @Derived1_bar(ptr [[D]])
+; FUNC-CMP-NEXT: call void @Derived1_bar(ptr [[D]])
; FUNC-CMP-NEXT: br label %[[IF_END_ICP:.*]]
; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT]]:
; FUNC-CMP-NEXT: [[TMP3:%.*]] = icmp eq ptr [[TMP1]], @Derived2_bar
; FUNC-CMP-NEXT: br i1 [[TMP3]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF11:![0-9]+]]
; FUNC-CMP: [[IF_TRUE_DIRECT_TARG1]]:
-; FUNC-CMP-NEXT: tail call void @Derived2_bar(ptr [[D]])
+; FUNC-CMP-NEXT: call void @Derived2_bar(ptr [[D]])
; FUNC-CMP-NEXT: br label %[[IF_END_ICP3:.*]]
; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT2]]:
; FUNC-CMP-NEXT: [[TMP4:%.*]] = icmp eq ptr [[TMP1]], @Base1_bar
; FUNC-CMP-NEXT: br i1 [[TMP4]], label %[[IF_TRUE_DIRECT_TARG4:.*]], label %[[IF_FALSE_ORIG_INDIRECT5:.*]], !prof [[PROF12:![0-9]+]]
; FUNC-CMP: [[IF_TRUE_DIRECT_TARG4]]:
-; FUNC-CMP-NEXT: tail call void @Base1_bar(ptr [[D]])
+; FUNC-CMP-NEXT: call void @Base1_bar(ptr [[D]])
; FUNC-CMP-NEXT: br label %[[IF_END_ICP6:.*]]
; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT5]]:
-; FUNC-CMP-NEXT: tail call void [[TMP1]](ptr [[D]])
+; FUNC-CMP-NEXT: call void [[TMP1]](ptr [[D]])
; FUNC-CMP-NEXT: br label %[[IF_END_ICP6]]
; FUNC-CMP: [[IF_END_ICP6]]:
; FUNC-CMP-NEXT: br label %[[IF_END_ICP3]]
@@ -97,7 +97,7 @@ entry:
tail call void @llvm.assume(i1 %0)
%vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
%1 = load ptr, ptr %vfn
- tail call void %1(ptr %d), !prof !10
+ call void %1(ptr %d), !prof !10
ret void
}
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll
index e82aa9f14788..6d3a6972f688 100644
--- a/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll
@@ -1,10 +1,10 @@
-; RUN: opt < %s -passes='pgo-icall-prom' -icp-enable-vtable-cmp -S | FileCheck %s --check-prefix=VTABLE
+; RUN: opt < %s -passes='pgo-icall-prom' -enable-vtable-profile-use -S | FileCheck %s --check-prefix=VTABLE
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-@_ZTV4Base = dso_local constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base10get_ticketEv] }, !type !0, !type !1
-@_ZTV7Derived = dso_local constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived10get_ticketEv] }, !type !0, !type !1, !type !2, !type !3
+@_ZTV4Base = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base10get_ticketEv] }, !type !0, !type !1
+@_ZTV7Derived = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived10get_ticketEv] }, !type !0, !type !1, !type !2, !type !3
@.str = private constant [15 x i8] c"out of tickets\00"
@@ -109,12 +109,10 @@ lpad:
resume { ptr, i32 } %0
}
-declare i1 @llvm.type.test(ptr, metadata) #2
+declare i1 @llvm.type.test(ptr, metadata)
declare void @llvm.assume(i1)
declare i32 @__gxx_personality_v0(...)
-attributes #2 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-
!0 = !{i64 16, !"_ZTS4Base"}
!1 = !{i64 16, !"_ZTSM4BaseFivE.virtual"}
!2 = !{i64 16, !"_ZTS7Derived"}
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll
index 1dc208c30952..d9126aec3d94 100644
--- a/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -icp-enable-vtable-cmp -S 2>&1 | FileCheck %s --check-prefixes=VTABLE,REMARK
+; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -S 2>&1 | FileCheck %s --check-prefixes=VTABLE,REMARK
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@@ -6,8 +6,8 @@ target triple = "x86_64-unknown-linux-gnu"
; REMARK: remark: <unknown>:0:0: Promote indirect call to _ZN7Derived5func1Eii with count 900 out of 1600, compare 1 vtables and sink 1 instruction
; REMARK: remark: <unknown>:0:0: Promote indirect call to _ZN4Base5func1Eii with count 700 out of 700, compare 1 vtables and sink 1 instructions
-@_ZTV7Derived = dso_local constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived5func1Eii] }, align 8, !type !0, !type !1, !type !2, !type !3
-@_ZTV4Base = dso_local constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Eii] }, align 8, !type !0, !type !1
+@_ZTV7Derived = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived5func1Eii] }, !type !0, !type !1, !type !2, !type !3
+@_ZTV4Base = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Eii] }, !type !0, !type !1
define i32 @test_tail_call(ptr %ptr, i32 %a, i32 %b) {
; VTABLE-LABEL: define i32 @test_tail_call(
@@ -43,6 +43,7 @@ entry:
declare i1 @llvm.type.test(ptr, metadata)
declare void @llvm.assume(i1)
+
define i32 @_ZN7Derived5func1Eii(ptr %this, i32 %a, i32 %b) {
entry:
%sub = sub nsw i32 %a, %b