diff options
author | mingmingl <mingmingl@google.com> | 2024-06-06 23:12:04 -0700 |
---|---|---|
committer | mingmingl <mingmingl@google.com> | 2024-06-06 23:12:12 -0700 |
commit | 4f6b7ab1e5640a36316cffc3f879b6c6f4408d54 (patch) | |
tree | b7fbec420bb1532abcabe4f7eaeb5965af635999 | |
parent | 142845ce3830a43ad80a5ad81d4c6518c2eebd8e (diff) |
1. Resolve review comments.
2. Handle vtable's PGO name, like what we do for indirect-call
promotion.
- InstrProf.h/cpp and PGOInstrumentation.cpp are modified.
3. Make use of 'MaxNumVTableAnnotations' in PGOInstrumentation.cpp
-rw-r--r-- | compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp | 34 | ||||
-rw-r--r-- | llvm/include/llvm/ProfileData/InstrProf.h | 10 | ||||
-rw-r--r-- | llvm/lib/ProfileData/InstrProf.cpp | 40 | ||||
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp | 301 | ||||
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp | 30 | ||||
-rw-r--r-- | llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll | 34 | ||||
-rw-r--r-- | llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll | 10 | ||||
-rw-r--r-- | llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll | 7 |
8 files changed, 279 insertions, 187 deletions
diff --git a/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp index 19def403cca7..0a32034f182a 100644 --- a/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp +++ b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp @@ -110,26 +110,40 @@ // ICTEXT: _ZTV8Derived1:250 // Test indirect call promotion transformation using vtable profiles. -// Build with `-g` to enable debug information. -// RUN: %clangxx -m64 -fprofile-use=test.profdata -fuse-ld=lld -g -flto=thin -fwhole-program-vtables -O2 -mllvm -enable-vtable-value-profiling -mllvm -icp-enable-vtable-cmp -Rpass=pgo-icall-prom -mllvm -print-after=pgo-icall-prom -mllvm -filter-print-funcs=main %s 2>&1 | FileCheck %s --check-prefixes=REMARK,IR --implicit-check-not="!VP" +// - Build with `-g` to enable debug information. +// - In real world settings, ICP pass is disabled in prelink pipeline. In +// the postlink pipeline, ICP is enabled after whole-program-devirtualization +// pass. Do the same thing in this test. +// - Enable `-fwhole-program-vtables` generate type metadata and intrincis. +// - Enable `-fno-split-lto-unit` and `-Wl,-lto-whole-program-visibility` to +// preserve type intrinsics for ICP pass. +// RUN: %clangxx -m64 -fprofile-use=test.profdata -Wl,--lto-whole-program-visibility \ +// RUN: -mllvm -disable-icp=true -Wl,-mllvm,-disable-icp=false -fuse-ld=lld \ +// RUN: -g -flto=thin -fwhole-program-vtables -fno-split-lto-unit -O2 \ +// RUN: -mllvm -enable-vtable-value-profiling -Wl,-mllvm,-enable-vtable-value-profiling \ +// RUN: -mllvm -enable-vtable-profile-use \ +// RUN: -Wl,-mllvm,-enable-vtable-profile-use -Rpass=pgo-icall-prom \ +// RUN: -Wl,-mllvm,-print-after=pgo-icall-prom \ +// RUN: -Wl,-mllvm,-filter-print-funcs=main %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=REMARK,IR --implicit-check-not="!VP" // For the indirect call site `ptr->func` -// REMARK: instrprof-vtable-value-prof.cpp:191:19: remark: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii with count 150 out of 200, compare 1 vtables and sink 1 instructions -// REMARK: instrprof-vtable-value-prof.cpp:191:19: remark: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, compare 1 vtables and sink 1 instructions +// REMARK: instrprof-vtable-value-prof.cpp:205:19: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii with count 150 out of 200, compare 1 vtables and sink 1 instructions +// REMARK: instrprof-vtable-value-prof.cpp:205:19: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, compare 1 vtables and sink 1 instructions // // For the indirect call site `delete ptr` -// REMARK: instrprof-vtable-value-prof.cpp:193:5: remark: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev with count 750 out of 1000, compare 1 vtables and sink 2 instructions -// REMARK: instrprof-vtable-value-prof.cpp:193:5: remark: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, compare 1 vtables and sink 2 instructions +// REMARK: instrprof-vtable-value-prof.cpp:207:5: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev with count 750 out of 1000, compare 1 vtables and sink 2 instructions +// REMARK: instrprof-vtable-value-prof.cpp:207:5: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, compare 1 vtables and sink 2 instructions // The IR matchers for indirect callsite `ptr->func`. // IR-LABEL: @main -// IR: [[OBJ:%.*]] = call {{.*}} @_Z10createTypei +// IR: [[OBJ:%.*]] = {{.*}}call {{.*}} @_Z10createTypei // IR: [[VTABLE:%.*]] = load ptr, ptr [[OBJ]] // IR: [[CMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTVN12_GLOBAL__N_18Derived2E, i32 16) // IR: br i1 [[CMP1]], label %[[BB1:.*]], label %[[BB2:[a-zA-Z0-9_.]+]], // // IR: [[BB1]]: -// IR: [[RESBB1:%.*]] = call {{.*}} @_ZN12_GLOBAL__N_18Derived24funcEii +// IR: [[RESBB1:%.*]] = {{.*}}call {{.*}} @_ZN12_GLOBAL__N_18Derived24funcEii // IR: br label %[[MERGE0:[a-zA-Z0-9_.]+]] // // IR: [[BB2]]: @@ -137,12 +151,12 @@ // IR: br i1 [[CMP2]], label %[[BB3:.*]], label %[[BB4:[a-zA-Z0-9_.]+]], // // IR: [[BB3]]: -// IR: [[RESBB3:%.*]] = call {{.*}} @_ZN8Derived14funcEii +// IR: [[RESBB3:%.*]] = {{.*}}call {{.*}} @_ZN8Derived14funcEii // IR: br label %[[MERGE1:[a-zA-Z0-9_.]+]], // // IR: [[BB4]]: // IR: [[FUNCPTR:%.*]] = load ptr, ptr [[VTABLE]] -// IR: [[RESBB4:%.*]] = call {{.*}} [[FUNCPTR]] +// IR: [[RESBB4:%.*]] = {{.*}}call {{.*}} [[FUNCPTR]] // IR: br label %[[MERGE1]] // // IR: [[MERGE1]]: diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 88c7fe425b5a..817005bd28d8 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -304,8 +304,12 @@ getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue = false); +// TODO: Unify metadata name 'PGOFuncName' and 'PGOName', by supporting read +// of this metadata for backward compatibility and generating 'PGOName' only. inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; } +inline StringRef getPGONameMetadataName() { return "PGOName"; } + /// Return the PGOFuncName meta data associated with a function. MDNode *getPGOFuncNameMetadata(const Function &F); @@ -314,8 +318,14 @@ std::string getPGOName(const GlobalVariable &V, bool InLTO = false); /// Create the PGOFuncName meta data if PGOFuncName is different from /// function's raw name. This should only apply to internal linkage functions /// declared by users only. +/// TODO: Update all callers to 'createPGONameMetadata' and deprecate this +/// function. void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName); +/// Create the PGOName metadata if a global object's PGO name is different from +/// its mangled name. This should apply to local-linkage global objects only. +void createPGONameMetadata(GlobalObject &GO, StringRef PGOName); + /// Check if we can use Comdat for profile variables. This will eliminate /// the duplicated profile variables for Comdat functions. bool needsComdatForCounter(const GlobalObject &GV, const Module &M); diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 806d01de1ada..4649db2d92ec 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -228,6 +228,12 @@ cl::opt<bool> EnableVTableValueProfiling( "the types of a C++ pointer. The information is used in indirect " "call promotion to do selective vtable-based comparison.")); +cl::opt<bool> EnableVTableProfileUse( + "enable-vtable-profile-use", cl::init(false), + cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " + "profiles will be used by ICP pass for more efficient indirect " + "call sequence. If false, type profiles won't be used.")); + std::string getInstrProfSectionName(InstrProfSectKind IPSK, Triple::ObjectFormatType OF, bool AddSegmentInfo) { @@ -391,7 +397,7 @@ std::string getPGOName(const GlobalVariable &V, bool InLTO) { // PGONameMetadata should be set by compiler at profile use time // and read by symtab creation to look up symbols corresponding to // a MD5 hash. - return getIRPGOObjectName(V, InLTO, /*PGONameMetadata=*/nullptr); + return getIRPGOObjectName(V, InLTO, V.getMetadata(getPGONameMetadataName())); } // See getIRPGOObjectName() for a discription of the format. @@ -480,8 +486,7 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) { for (GlobalVariable &G : M.globals()) { if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type)) continue; - if (Error E = addVTableWithName( - G, getIRPGOObjectName(G, InLTO, /* PGONameMetadata */ nullptr))) + if (Error E = addVTableWithName(G, getPGOName(G, InLTO))) return E; } @@ -1393,16 +1398,27 @@ MDNode *getPGOFuncNameMetadata(const Function &F) { return F.getMetadata(getPGOFuncNameMetadataName()); } -void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) { - // Only for internal linkage functions. - if (PGOFuncName == F.getName()) - return; - // Don't create duplicated meta-data. - if (getPGOFuncNameMetadata(F)) +static void createPGONameMetadata(GlobalObject &GO, StringRef MetadataName, + StringRef PGOName) { + // For internal linkage objects, its name is not the same as its PGO name. + if (GO.getName() == PGOName) return; - LLVMContext &C = F.getContext(); - MDNode *N = MDNode::get(C, MDString::get(C, PGOFuncName)); - F.setMetadata(getPGOFuncNameMetadataName(), N); + + // Don't created duplictaed metadata. + if (GO.getMetadata(MetadataName)) + return; + + LLVMContext &C = GO.getContext(); + MDNode *N = MDNode::get(C, MDString::get(C, PGOName)); + GO.setMetadata(MetadataName, N); +} + +void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) { + return createPGONameMetadata(F, getPGOFuncNameMetadataName(), PGOFuncName); +} + +void createPGONameMetadata(GlobalObject &GO, StringRef PGOName) { + return createPGONameMetadata(GO, getPGONameMetadataName(), PGOName); } bool needsComdatForCounter(const GlobalObject &GO, const Module &M) { diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index 5384fa4e3794..c805cf3f22f3 100644 --- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -58,6 +58,10 @@ STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites."); extern cl::opt<unsigned> MaxNumVTableAnnotations; +namespace llvm { +extern cl::opt<bool> EnableVTableProfileUse; +} + // Command line option to disable indirect-call promotion with the default as // false. This is for debug purpose. static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden, @@ -110,29 +114,31 @@ static cl::opt<bool> ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens")); -// This option is meant to be used by LLVM regression test and test the -// transformation that compares vtables. -static cl::opt<bool> ICPEnableVTableCmp( - "icp-enable-vtable-cmp", cl::init(false), cl::Hidden, - cl::desc("If ThinLTO and WPD is enabled and this option is true, " - "indirect-call promotion pass will compare vtables rather than " - "functions for speculative devirtualization of virtual calls." - " If set to false, indirect-call promotion pass will always " - "compare functions.")); - -static cl::opt<float> - ICPVTableCountPercentage("icp-vtable-count-percentage", cl::init(0.99), - cl::Hidden, - cl::desc("Percentage of vtable count to compare")); - -static cl::opt<int> ICPNumAdditionalVTableLast( - "icp-num-additional-vtable-last", cl::init(0), cl::Hidden, - cl::desc("The number of additional instruction for the last candidate")); +// Indirect call promotion pass will fall back to function-based comparison if +// vtable-count / function-count is smaller than this threshold. +static cl::opt<float> ICPVTablePercentageThreshold( + "icp-vtable-percentage-threshold", cl::init(0.99), cl::Hidden, + cl::desc("The percentage threshold of vtable-count / function-count for " + "cost-benefit analysis. ")); + +// Although comparing vtables can save a vtable load, we may need to compare +// vtable pointer with multiple vtable address points due to class inheritance. +// Comparing with multiple vtables inserts additional instructions on hot code +// path; and doing so for earlier candidate of one icall can affect later +// function candidate in an undesired way. We allow multiple vtable comparison +// for the last function candidate and use the option below to cap the number +// of vtables. +static cl::opt<int> ICPMaxNumVTableLastCandidate( + "icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden, + cl::desc("The maximum number of vtable for the last candidate.")); namespace { +// The key is a vtable global variable, and the value is a map. +// In the inner map, the key represents address point offsets and the value is a +// constant for this address point. using VTableAddressPointOffsetValMap = - SmallDenseMap<const GlobalVariable *, SmallDenseMap<int, Constant *, 4>, 8>; + SmallDenseMap<const GlobalVariable *, SmallDenseMap<int, Constant *>>; // A struct to collect type information for a virtual call site. struct VirtualCallSiteInfo { @@ -146,19 +152,25 @@ struct VirtualCallSiteInfo { // The key is a virtual call, and value is its type information. using VirtualCallSiteTypeInfoMap = - SmallDenseMap<const CallBase *, VirtualCallSiteInfo, 8>; + SmallDenseMap<const CallBase *, VirtualCallSiteInfo>; + +// The key is vtable GUID, and value is its value profile count. +using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t>; -// Find the offset where type string is `CompatibleType`. +// Returns the address point offset of the given compatible type. +// +// Type metadata of a vtable specifies the types that can container a pointer to +// this vtable, for example, `Base*` can be a pointer to an instantiated type +// but not vice versa. See also https://llvm.org/docs/TypeMetadata.html static std::optional<uint64_t> -getCompatibleTypeOffset(const GlobalVariable &VTableVar, - StringRef CompatibleType) { - SmallVector<MDNode *, 2> Types; // type metadata associated with a vtable. +getAddressPointOffset(const GlobalVariable &VTableVar, + StringRef CompatibleType) { + SmallVector<MDNode *> Types; VTableVar.getMetadata(LLVMContext::MD_type, Types); for (MDNode *Type : Types) if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get()); TypeId && TypeId->getString() == CompatibleType) - return cast<ConstantInt>( cast<ConstantAsMetadata>(Type->getOperand(0))->getValue()) ->getZExtValue(); @@ -181,7 +193,7 @@ static Constant *getVTableAddressPointOffset(GlobalVariable *VTable, llvm::ConstantInt::get(Type::getInt32Ty(Context), AddressPointOffset)); } -// Returns the basic block in which `Inst` by `Use`. +// Returns the basic block in which `Inst` is used via its `UserInst`. static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) { if (PHINode *PN = dyn_cast<PHINode>(UserInst)) return PN->getIncomingBlock(U); @@ -199,7 +211,7 @@ static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) { BasicBlock *BB = Inst->getParent(); assert(Inst->getParent() != DestBB && BB->getTerminator()->getNumSuccessors() == 2 && - "Caller should guarantee"); + "Guaranteed by ICP transformation"); // Do not sink across a critical edge for simplicity. if (DestBB->getUniquePredecessor() != BB) return false; @@ -225,18 +237,14 @@ static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) { // For the virtual call dispatch sequence, try to sink vtable load instructions // to the cold indirect call fallback. +// FIXME: Move the sink eligibility check below to a utility function in +// Transforms/Utils/ directory. static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { - assert(!I->isTerminator()); if (!isDestBBSuitableForSink(I, DestBlock)) return false; - assert(DestBlock->getUniquePredecessor() == I->getParent()); - - // Do not move control-flow-involving, volatile loads, vaarg, etc. - // Do not sink static or dynamic alloca instructions. Static allocas must - // remain in the entry block, and dynamic allocas must not be sunk in between - // a stacksave / stackrestore pair, which would incorrectly shorten its - // lifetime. + // Do not move control-flow-involving, volatile loads, vaarg, alloca + // instructions, etc. if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() || isa<AllocaInst>(I)) return false; @@ -253,12 +261,16 @@ static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { // We can only sink load instructions if there is nothing between the load and // the end of block that could change the value. if (I->mayReadFromMemory()) { - // We know that SrcBlock is the unique predecessor of DestBlock. + // We already know that SrcBlock is the unique predecessor of DestBlock. for (BasicBlock::iterator Scan = std::next(I->getIterator()), E = I->getParent()->end(); - Scan != E; ++Scan) + Scan != E; ++Scan) { + // Note analysis analysis can tell whether two pointers can point to the + // same object in memory or not thereby find further opportunities to + // sink. if (Scan->mayWriteToMemory()) return false; + } } BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); @@ -273,12 +285,10 @@ static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { // Try to sink instructions after VPtr to the indirect call fallback. // Returns the number of sunk IR instructions. -static int tryToSinkInstructions(Instruction *VPtr, +static int tryToSinkInstructions(BasicBlock *OriginalBB, BasicBlock *IndirectCallBB) { - BasicBlock *OriginalBB = VPtr->getParent(); - int SinkCount = 0; - // FIXME: Find a way to bail out of the loop. + // Sink all eligible instructions in OriginalBB in reverse order. for (Instruction &I : llvm::make_early_inc_range(llvm::drop_begin(llvm::reverse(*OriginalBB)))) if (tryToSinkInstruction(&I, IndirectCallBB)) @@ -314,16 +324,19 @@ private: Function *const TargetFunction; const uint64_t Count; - // The byte offset of TargetFunction starting from the vtable address point. - uint64_t FunctionOffset; - SmallVector<std::pair<uint64_t, uint64_t>, 2> VTableGUIDAndCounts; - SmallVector<Constant *, 2> AddressPoints; + // The following fields only exists for promotion candidates with vtable + // information. + // + // Due to class inheritance, one virtual call candidate can come from + // multiple vtables. `VTableGUIDAndCounts` tracks the vtable GUIDs and + // counts for 'TargetFunction'. `AddressPoints` stores the vtable address + // points for comparison. + VTableGUIDCountsMap VTableGUIDAndCounts; + SmallVector<Constant *> AddressPoints; PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {} }; - using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t, 4>; - // Check if the indirect-call call site should be promoted. Return the number // of promotions. Inst is the candidate indirect call, ValueDataRef // contains the array of value profile data for profiled targets, @@ -356,9 +369,13 @@ private: bool isProfitableToCompareVTables( const std::vector<PromotionCandidate> &Candidates, uint64_t TotalCount); - // Populate `VTableGUIDCounts` vtable GUIDs and their counts and each - // candidate with vtable information. Returns the vtable instruction if not - // null. + // Given an indirect callsite and the list of function candidates, compute + // the following vtable information in output parameters and returns vtable + // pointer if type profiles exist. + // - Populate `VTableGUIDCounts` with <vtable-guid, count> with !prof metadata + // attached on the vtable pointer. + // - For each function candidate, finds out the vtables from which it get + // called and stores the <vtable-guid, count> there. Instruction *computeVTableInfos(const CallBase *CB, VTableGUIDCountsMap &VTableGUIDCounts, std::vector<PromotionCandidate> &Candidates); @@ -490,9 +507,32 @@ Constant *IndirectCallPromoter::getOrCreateVTableAddressPointVar( Instruction *IndirectCallPromoter::computeVTableInfos( const CallBase *CB, VTableGUIDCountsMap &GUIDCountsMap, std::vector<PromotionCandidate> &Candidates) { - if (!ICPEnableVTableCmp) + if (!EnableVTableProfileUse) return nullptr; + // Take the following code sequence as an example, here is how the code works + // @vtable1 = {[n x ptr] [... ptr @func1]} + // @vtable2 = {[m x ptr] [... ptr @func2]} + // + // %vptr = load ptr, ptr %d, !prof !0 + // %0 = tail call i1 @llvm.type.test(ptr %vptr, metadata !"vtable1") + // tail call void @llvm.assume(i1 %0) + // %vfn = getelementptr inbounds ptr, ptr %vptr, i64 1 + // %1 = load ptr, ptr %vfn + // call void %1(ptr %d), !prof !1 + // + // !0 = !{!"VP", i32 2, i64 100, i64 123, i64 50, i64 456, i64 50} + // !1 = !{!"VP", i32 0, i64 100, i64 789, i64 50, i64 579, i64 50} + // + // Step 1. Find out the %vptr instruction for indirect call and use its !prof + // to populate `GUIDCountsMap`. + // Step 2. For each vtable-guid, look up its definition from symtab. LTO can + // make vtable definitions visible across modules. + // Step 3. Compute the byte offset of the virtual call, by adding vtable + // address point offset and function's offset relative to vtable address + // point. For each function candidate, this step tells us the vtable from + // which it comes from, and the vtable address point to compare %vptr with. + // Only virtual calls have virtual call site info. auto Iter = VirtualCSInfo.find(CB); if (Iter == VirtualCSInfo.end()) @@ -525,7 +565,7 @@ Instruction *IndirectCallPromoter::computeVTableInfos( } std::optional<uint64_t> MaybeAddressPointOffset = - getCompatibleTypeOffset(*VTableVar, VirtualCallInfo.CompatibleTypeStr); + getAddressPointOffset(*VTableVar, VirtualCallInfo.CompatibleTypeStr); if (!MaybeAddressPointOffset) continue; @@ -541,8 +581,9 @@ Instruction *IndirectCallPromoter::computeVTableInfos( continue; auto &Candidate = Candidates[CalleeIndexIter->second]; - Candidate.VTableGUIDAndCounts.push_back( - {VTableVal, VTableValueDataArray[j].Count}); + // There shouldn't be duplicate GUIDs in one !prof metadata, so assign + // counters directly won't cause overwrite or counter loss. + Candidate.VTableGUIDAndCounts[VTableVal] = VTableValueDataArray[j].Count; Candidate.AddressPoints.push_back( getOrCreateVTableAddressPointVar(VTableVar, AddressPointOffset)); } @@ -550,23 +591,23 @@ Instruction *IndirectCallPromoter::computeVTableInfos( return VPtr; } -static MDNode *getBranchWeights(LLVMContext &Context, uint64_t IfCount, - uint64_t ElseCount) { +// Creates 'branch_weights' prof metadata using TrueWeight and FalseWeight. +// Scales uint64_t counters down to uint32_t if necessary to prevent overflow. +static MDNode *createBranchWeights(LLVMContext &Context, uint64_t TrueWeight, + uint64_t FalseWeight) { MDBuilder MDB(Context); - uint64_t Scale = calculateCountScale(std::max(IfCount, ElseCount)); - return MDB.createBranchWeights(scaleBranchCount(IfCount, Scale), - scaleBranchCount(ElseCount, Scale)); + uint64_t Scale = calculateCountScale(std::max(TrueWeight, FalseWeight)); + return MDB.createBranchWeights(scaleBranchCount(TrueWeight, Scale), + scaleBranchCount(FalseWeight, Scale)); } CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE) { - MDNode *BranchWeights = - getBranchWeights(CB.getContext(), Count, TotalCount - Count); - - CallBase &NewInst = - promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights); + CallBase &NewInst = promoteCallWithIfThenElse( + CB, DirectCallee, + createBranchWeights(CB.getContext(), Count, TotalCount - Count)); if (AttachProfToDirectCall) setBranchWeights(NewInst, {static_cast<uint32_t>(Count)}); @@ -600,10 +641,13 @@ bool IndirectCallPromoter::tryToPromoteWithFuncCmp( NumOfPGOICallPromotion++; NumPromoted++; - if (!ICPEnableVTableCmp || C.VTableGUIDAndCounts.empty()) + if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty()) continue; - // Update VTableGUIDCounts + // After a virtual call candidate gets promoted, update the vtable's counts + // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents + // a vtable from which the virtual call is loaded. Compute the sum and use + // 128-bit APInt to improve accuracy. uint64_t SumVTableCount = 0; for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) SumVTableCount += VTableCount; @@ -671,22 +715,20 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp( MutableArrayRef<InstrProfValueData> ICallProfDataRef, VTableGUIDCountsMap &VTableGUIDCounts) { SmallVector<uint64_t, 4> PromotedFuncCount; + for (const auto &Candidate : Candidates) { - uint64_t IfCount = 0; - for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts) { - IfCount += Count; + for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts) VTableGUIDCounts[GUID] -= Count; - } - // Use indirect call counters to compute branch weights. + // 'OriginalBB' is the basic block of indirect call before indirect call + // promotion. BasicBlock *OriginalBB = CB.getParent(); promoteCallWithVTableCmp( CB, VPtr, Candidate.TargetFunction, Candidate.AddressPoints, - getBranchWeights(CB.getContext(), IfCount, TotalFuncCount - IfCount)); + createBranchWeights(CB.getContext(), Candidate.Count, + TotalFuncCount - Candidate.Count)); - int SinkCount = tryToSinkInstructions( - PromotedFuncCount.empty() ? VPtr : OriginalBB->getFirstNonPHI(), - CB.getParent()); + int SinkCount = tryToSinkInstructions(OriginalBB, CB.getParent()); ORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB) @@ -700,9 +742,9 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp( << " instructions"; }); - PromotedFuncCount.push_back(IfCount); + PromotedFuncCount.push_back(Candidate.Count); - TotalFuncCount -= IfCount; + TotalFuncCount -= Candidate.Count; NumOfPGOICallPromotion++; } @@ -711,8 +753,10 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp( // Update value profiles for 'CB' and 'VPtr', assuming that each 'CB' has a // a distinct 'VPtr'. - // TODO: Handle profile update properly when Clang `-fstrict-vtable-pointers` - // is enabled and a vtable is used to load multiple virtual functions. + // FIXME: When Clang `-fstrict-vtable-pointers` is enabled, a vtable might be + // used to load multiple virtual functions. The vtable profiles needs to be + // updated properly in that case (e.g, annotate type profiles per indirect + // call). for (size_t I = 0; I < PromotedFuncCount.size(); I++) ICallProfDataRef[I].Count -= std::max(PromotedFuncCount[I], ICallProfDataRef[I].Count); @@ -770,7 +814,7 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) { // cannot sink to indirect fallback. bool IndirectCallPromoter::isProfitableToCompareVTables( const std::vector<PromotionCandidate> &Candidates, uint64_t TotalCount) { - if (!ICPEnableVTableCmp || Candidates.empty()) + if (!EnableVTableProfileUse || Candidates.empty()) return false; uint64_t RemainingVTableCount = TotalCount; for (size_t I = 0; I < Candidates.size(); I++) { @@ -779,17 +823,16 @@ bool IndirectCallPromoter::isProfitableToCompareVTables( for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts) VTableSumCount += Count; - if (VTableSumCount < Candidate.Count * ICPVTableCountPercentage) + if (VTableSumCount < Candidate.Count * ICPVTablePercentageThreshold) return false; RemainingVTableCount -= Candidate.Count; - int NumAdditionalVTable = 0; + int MaxNumVTable = 1; if (I == Candidates.size() - 1) - NumAdditionalVTable = ICPNumAdditionalVTableLast; + MaxNumVTable = ICPMaxNumVTableLastCandidate; - int ActualNumAdditionalInst = Candidate.AddressPoints.size() - 1; - if (ActualNumAdditionalInst > NumAdditionalVTable) { + if ((int)Candidate.AddressPoints.size() > MaxNumVTable) { return false; } } @@ -810,45 +853,6 @@ computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM, return FAM.getResult<DominatorTreeAnalysis>(F); }; - auto compute = [&](Function *Func) { - if (!Func || Func->use_empty()) - return; - // Iterate all type.test calls and find all indirect calls. - // TODO: Add llvm.public.type.test - for (Use &U : llvm::make_early_inc_range(Func->uses())) { - auto *CI = dyn_cast<CallInst>(U.getUser()); - if (!CI) - continue; - auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1)); - if (!TypeMDVal) - continue; - auto *CompatibleTypeId = dyn_cast<MDString>(TypeMDVal->getMetadata()); - if (!CompatibleTypeId) - continue; - - // Find out all devirtualizable call sites given a llvm.type.test - // intrinsic call. - SmallVector<DevirtCallSite, 1> DevirtCalls; - SmallVector<CallInst *, 1> Assumes; - auto &DT = LookupDomTree(*CI->getFunction()); - findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT); - - // type-id, offset from the address point - // combined with type metadata to compute function offset - for (auto &DevirtCall : DevirtCalls) { - CallBase &CB = DevirtCall.CB; - // Given an indirect call, try find the instruction which loads a - // pointer to virtual table. - Instruction *VTablePtr = - PGOIndirectCallVisitor::tryGetVTableInstruction(&CB); - if (!VTablePtr) - continue; - VirtualCSInfo[&CB] = {DevirtCall.Offset, VTablePtr, - CompatibleTypeId->getString()}; - } - } - }; - // Right now only llvm.type.test is used to find out virtual call sites. // With ThinLTO and whole-program-devirtualization, llvm.type.test and // llvm.public.type.test are emitted, and llvm.public.type.test is either @@ -859,12 +863,39 @@ computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM, // that case. Function *TypeTestFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_test)); + if (!TypeTestFunc || TypeTestFunc->use_empty()) + return; + // Iterate all type.test calls and find all indirect calls. + for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) { + auto *CI = dyn_cast<CallInst>(U.getUser()); + if (!CI) + continue; + auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1)); + if (!TypeMDVal) + continue; + auto *CompatibleTypeId = dyn_cast<MDString>(TypeMDVal->getMetadata()); + if (!CompatibleTypeId) + continue; - compute(TypeTestFunc); - - Function *PublicTypeTestFunc = - M.getFunction(Intrinsic::getName(Intrinsic::public_type_test)); - compute(PublicTypeTestFunc); + // Find out all devirtualizable call sites given a llvm.type.test + // intrinsic call. + SmallVector<DevirtCallSite, 1> DevirtCalls; + SmallVector<CallInst *, 1> Assumes; + auto &DT = LookupDomTree(*CI->getFunction()); + findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT); + + for (auto &DevirtCall : DevirtCalls) { + CallBase &CB = DevirtCall.CB; + // Given an indirect call, try find the instruction which loads a + // pointer to virtual table. + Instruction *VTablePtr = + PGOIndirectCallVisitor::tryGetVTableInstruction(&CB); + if (!VTablePtr) + continue; + VirtualCSInfo[&CB] = {DevirtCall.Offset, VTablePtr, + CompatibleTypeId->getString()}; + } + } } // A wrapper function that does the actual work. @@ -883,11 +914,13 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO, computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo); - // This map records states across functions in an LLVM IR module. - // IndirectCallPromoter processes one - // function at a time and updates this map with new entries the first time - // the entry is needed in the module; the subsequent functions could re-use - // map entries inserted when processing prior functions. + // VTableAddressPointOffsetVal stores the vtable address points. The vtable + // address point of a given <vtable, address point offset> is static (doesn't + // change after being computed once). + // IndirectCallPromoter::getOrCreateVTableAddressPointVar creates the map + // entry the first time a <vtable, offset> pair is seen, as + // promoteIndirectCalls processes an IR module and calls IndirectCallPromoter + // repeatedly on each function. VTableAddressPointOffsetValMap VTableAddressPointOffsetVal; for (auto &F : M) { diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 2269c2e0fffa..26444a5f3d13 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -319,6 +319,8 @@ static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold( cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold.")); +extern cl::opt<unsigned> MaxNumVTableAnnotations; + namespace llvm { // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts @@ -331,6 +333,7 @@ extern cl::opt<std::string> ViewBlockFreqFuncName; // Command line option to enable vtable value profiling. Defined in // ProfileData/InstrProf.cpp: -enable-vtable-value-profiling= extern cl::opt<bool> EnableVTableValueProfiling; +extern cl::opt<bool> EnableVTableProfileUse; extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate; } // namespace llvm @@ -1726,6 +1729,14 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) { llvm_unreachable("Unknown visiting mode"); } +static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind) { + if (ValueProfKind == IPVK_MemOPSize) + return MaxNumMemOPAnnotations; + if (ValueProfKind == llvm::IPVK_VTableTarget) + return MaxNumVTableAnnotations; + return MaxNumAnnotations; +} + // Traverse all valuesites and annotate the instructions for all value kind. void PGOUseFunc::annotateValueSites() { if (isValueProfilingDisabled()) @@ -1760,10 +1771,10 @@ void PGOUseFunc::annotateValueSites(uint32_t Kind) { LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind << "): Index = " << ValueSiteIndex << " out of " << NumValueSites << "\n"); - annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, - static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, - Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations - : MaxNumAnnotations); + annotateValueSite( + *M, *I.AnnotatedInst, ProfileRecord, + static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, + getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind))); ValueSiteIndex++; } } @@ -2052,6 +2063,16 @@ static bool annotateAllFunctions( return false; } + if (EnableVTableProfileUse) { + for (GlobalVariable &G : M.globals()) { + if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type)) + continue; + + // Create the PGOFuncName meta data. + createPGONameMetadata(G, getPGOName(G, false /* InLTO*/)); + } + } + // Add the profile summary (read from the header of the indexed summary) here // so that we can use it below when reading counters (which checks if the // function should be marked with a cold or inlinehint attribute). @@ -2227,7 +2248,6 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M, }; auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M); - if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS, LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) return PreservedAnalyses::all(); diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll index 96a2b2360787..7b7f6d17d59f 100644 --- a/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll +++ b/llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll @@ -1,18 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -icp-enable-vtable-cmp -icp-num-additional-vtable-last=2 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP -; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -icp-enable-vtable-cmp -icp-num-additional-vtable-last=0 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP +; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP +; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -@Base1 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0 -@Base2 = dso_local constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo] }, !type !2 -@Base3 = dso_local constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo] }, !type !6 +@Base1 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0 +@Base2 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo] }, !type !2 +@Base3 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo] }, !type !6 -@Derived1 = dso_local constant { [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived1_bar] }, !type !1, !type !2, !type !3 -@Derived2 = dso_local constant { [3 x ptr], [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo], [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived2_bar] }, !type !4, !type !5, !type !6, !type !7 -@Derived3 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0, !type !8 +@Derived1 = constant { [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived1_bar] }, !type !1, !type !2, !type !3 +@Derived2 = constant { [3 x ptr], [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo], [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived2_bar] }, !type !4, !type !5, !type !6, !type !7 +@Derived3 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0, !type !8 ; VTABLE-CMP: remark: <unknown>:0:0: Promote indirect call to Derived1_bar with count 600 out of 1600, compare 1 vtables and sink 2 instructions ; VTABLE-CMP: remark: <unknown>:0:0: Promote indirect call to Derived2_bar with count 500 out of 1000, compare 1 vtables and sink 2 instructions @@ -28,13 +28,13 @@ define void @test(ptr %d) { ; VTABLE-CMP-NEXT: [[TMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived1, i32 40) ; VTABLE-CMP-NEXT: br i1 [[TMP1]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF10:![0-9]+]] ; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG]]: -; VTABLE-CMP-NEXT: tail call void @Derived1_bar(ptr [[D]]) +; VTABLE-CMP-NEXT: call void @Derived1_bar(ptr [[D]]) ; VTABLE-CMP-NEXT: br label %[[IF_END_ICP:.*]] ; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT]]: ; VTABLE-CMP-NEXT: [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived2, i32 64) ; VTABLE-CMP-NEXT: br i1 [[TMP2]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF11:![0-9]+]] ; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG1]]: -; VTABLE-CMP-NEXT: tail call void @Derived2_bar(ptr [[D]]) +; VTABLE-CMP-NEXT: call void @Derived2_bar(ptr [[D]]) ; VTABLE-CMP-NEXT: br label %[[IF_END_ICP3:.*]] ; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT2]]: ; VTABLE-CMP-NEXT: [[TMP3:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Base1, i32 16) @@ -42,12 +42,12 @@ define void @test(ptr %d) { ; VTABLE-CMP-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] ; VTABLE-CMP-NEXT: br i1 [[TMP5]], label %[[IF_TRUE_DIRECT_TARG4:.*]], label %[[IF_FALSE_ORIG_INDIRECT5:.*]], !prof [[PROF12:![0-9]+]] ; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG4]]: -; VTABLE-CMP-NEXT: tail call void @Base1_bar(ptr [[D]]) +; VTABLE-CMP-NEXT: call void @Base1_bar(ptr [[D]]) ; VTABLE-CMP-NEXT: br label %[[IF_END_ICP6:.*]] ; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT5]]: ; VTABLE-CMP-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1 ; VTABLE-CMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VFN]], align 8 -; VTABLE-CMP-NEXT: tail call void [[TMP6]](ptr [[D]]) +; VTABLE-CMP-NEXT: call void [[TMP6]](ptr [[D]]) ; VTABLE-CMP-NEXT: br label %[[IF_END_ICP6]] ; VTABLE-CMP: [[IF_END_ICP6]]: ; VTABLE-CMP-NEXT: br label %[[IF_END_ICP3]] @@ -67,22 +67,22 @@ define void @test(ptr %d) { ; FUNC-CMP-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @Derived1_bar ; FUNC-CMP-NEXT: br i1 [[TMP2]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF10:![0-9]+]] ; FUNC-CMP: [[IF_TRUE_DIRECT_TARG]]: -; FUNC-CMP-NEXT: tail call void @Derived1_bar(ptr [[D]]) +; FUNC-CMP-NEXT: call void @Derived1_bar(ptr [[D]]) ; FUNC-CMP-NEXT: br label %[[IF_END_ICP:.*]] ; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT]]: ; FUNC-CMP-NEXT: [[TMP3:%.*]] = icmp eq ptr [[TMP1]], @Derived2_bar ; FUNC-CMP-NEXT: br i1 [[TMP3]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF11:![0-9]+]] ; FUNC-CMP: [[IF_TRUE_DIRECT_TARG1]]: -; FUNC-CMP-NEXT: tail call void @Derived2_bar(ptr [[D]]) +; FUNC-CMP-NEXT: call void @Derived2_bar(ptr [[D]]) ; FUNC-CMP-NEXT: br label %[[IF_END_ICP3:.*]] ; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT2]]: ; FUNC-CMP-NEXT: [[TMP4:%.*]] = icmp eq ptr [[TMP1]], @Base1_bar ; FUNC-CMP-NEXT: br i1 [[TMP4]], label %[[IF_TRUE_DIRECT_TARG4:.*]], label %[[IF_FALSE_ORIG_INDIRECT5:.*]], !prof [[PROF12:![0-9]+]] ; FUNC-CMP: [[IF_TRUE_DIRECT_TARG4]]: -; FUNC-CMP-NEXT: tail call void @Base1_bar(ptr [[D]]) +; FUNC-CMP-NEXT: call void @Base1_bar(ptr [[D]]) ; FUNC-CMP-NEXT: br label %[[IF_END_ICP6:.*]] ; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT5]]: -; FUNC-CMP-NEXT: tail call void [[TMP1]](ptr [[D]]) +; FUNC-CMP-NEXT: call void [[TMP1]](ptr [[D]]) ; FUNC-CMP-NEXT: br label %[[IF_END_ICP6]] ; FUNC-CMP: [[IF_END_ICP6]]: ; FUNC-CMP-NEXT: br label %[[IF_END_ICP3]] @@ -97,7 +97,7 @@ entry: tail call void @llvm.assume(i1 %0) %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1 %1 = load ptr, ptr %vfn - tail call void %1(ptr %d), !prof !10 + call void %1(ptr %d), !prof !10 ret void } diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll index e82aa9f14788..6d3a6972f688 100644 --- a/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll +++ b/llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll @@ -1,10 +1,10 @@ -; RUN: opt < %s -passes='pgo-icall-prom' -icp-enable-vtable-cmp -S | FileCheck %s --check-prefix=VTABLE +; RUN: opt < %s -passes='pgo-icall-prom' -enable-vtable-profile-use -S | FileCheck %s --check-prefix=VTABLE target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -@_ZTV4Base = dso_local constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base10get_ticketEv] }, !type !0, !type !1 -@_ZTV7Derived = dso_local constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived10get_ticketEv] }, !type !0, !type !1, !type !2, !type !3 +@_ZTV4Base = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base10get_ticketEv] }, !type !0, !type !1 +@_ZTV7Derived = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived10get_ticketEv] }, !type !0, !type !1, !type !2, !type !3 @.str = private constant [15 x i8] c"out of tickets\00" @@ -109,12 +109,10 @@ lpad: resume { ptr, i32 } %0 } -declare i1 @llvm.type.test(ptr, metadata) #2 +declare i1 @llvm.type.test(ptr, metadata) declare void @llvm.assume(i1) declare i32 @__gxx_personality_v0(...) -attributes #2 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } - !0 = !{i64 16, !"_ZTS4Base"} !1 = !{i64 16, !"_ZTSM4BaseFivE.virtual"} !2 = !{i64 16, !"_ZTS7Derived"} diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll index 1dc208c30952..d9126aec3d94 100644 --- a/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll +++ b/llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -icp-enable-vtable-cmp -S 2>&1 | FileCheck %s --check-prefixes=VTABLE,REMARK +; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -S 2>&1 | FileCheck %s --check-prefixes=VTABLE,REMARK target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -6,8 +6,8 @@ target triple = "x86_64-unknown-linux-gnu" ; REMARK: remark: <unknown>:0:0: Promote indirect call to _ZN7Derived5func1Eii with count 900 out of 1600, compare 1 vtables and sink 1 instruction ; REMARK: remark: <unknown>:0:0: Promote indirect call to _ZN4Base5func1Eii with count 700 out of 700, compare 1 vtables and sink 1 instructions -@_ZTV7Derived = dso_local constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived5func1Eii] }, align 8, !type !0, !type !1, !type !2, !type !3 -@_ZTV4Base = dso_local constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Eii] }, align 8, !type !0, !type !1 +@_ZTV7Derived = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived5func1Eii] }, !type !0, !type !1, !type !2, !type !3 +@_ZTV4Base = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base5func1Eii] }, !type !0, !type !1 define i32 @test_tail_call(ptr %ptr, i32 %a, i32 %b) { ; VTABLE-LABEL: define i32 @test_tail_call( @@ -43,6 +43,7 @@ entry: declare i1 @llvm.type.test(ptr, metadata) declare void @llvm.assume(i1) + define i32 @_ZN7Derived5func1Eii(ptr %this, i32 %a, i32 %b) { entry: %sub = sub nsw i32 %a, %b |