diff options
author | William Junda Huang <williamjhuang@google.com> | 2024-01-23 16:19:45 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-23 16:19:45 -0500 |
commit | 2b8649fbecdc300cde1032d739484690f75a27ba (patch) | |
tree | 21ad93ddb71324ea10974f57c8874d97a7531d46 | |
parent | 3a9ff32354a95305c523ab3b13bf3684854d1327 (diff) |
Added feature in llvm-profdata merge to filter functions from the profile (#78378)
`--function=<regex>` Include functions matching regex in the output
`--no-function=<regex>` Exclude functions matching regex from the output
If both are specified, `--no-function` has a higher precedence if a
function name matches both filters
-rw-r--r-- | llvm/docs/CommandGuide/llvm-profdata.rst | 10 | ||||
-rw-r--r-- | llvm/include/llvm/ProfileData/SampleProf.h | 2 | ||||
-rw-r--r-- | llvm/test/tools/llvm-profdata/merge-filter.test | 69 | ||||
-rw-r--r-- | llvm/tools/llvm-profdata/llvm-profdata.cpp | 73 |
4 files changed, 151 insertions, 3 deletions
diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst index f5e3c13ffbc8..acf016a6dbcd 100644 --- a/llvm/docs/CommandGuide/llvm-profdata.rst +++ b/llvm/docs/CommandGuide/llvm-profdata.rst @@ -217,6 +217,16 @@ OPTIONS The maximum number of functions in a single temporal profile trace. Longer traces will be truncated. The default value is 1000. +.. option:: --function=<string> + + Only keep functions matching the regex in the output, all others are erased + from the profile. + +.. option:: --no-function=<string> + + Remove functions matching the regex from the profile. If both --function and + --no-function are specified and a function matches both, it is removed. + EXAMPLES ^^^^^^^^ Basic Usage diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 66aaf602d0e1..8ac84d4b933f 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -1330,6 +1330,8 @@ public: } size_t erase(const key_type &Key) { return base_type::erase(Key); } + + iterator erase(iterator It) { return base_type::erase(It); } }; using NameFunctionSamples = std::pair<hash_code, const FunctionSamples *>; diff --git a/llvm/test/tools/llvm-profdata/merge-filter.test b/llvm/test/tools/llvm-profdata/merge-filter.test new file mode 100644 index 000000000000..5c47c6a75a7c --- /dev/null +++ b/llvm/test/tools/llvm-profdata/merge-filter.test @@ -0,0 +1,69 @@ +Test llvm-profdata merge with function filters. + +RUN: llvm-profdata merge --sample %p/Inputs/sample-profile.proftext --text --function="_Z3.*" | FileCheck %s --check-prefix=CHECK-FILTER1 +RUN: llvm-profdata merge --sample %p/Inputs/sample-profile.proftext --text --no-function="main" | FileCheck %s --check-prefix=CHECK-FILTER1 +CHECK-FILTER1: _Z3bari:20301:1437 +CHECK-NEXT: 1: 1437 +CHECK-NEXT: _Z3fooi:7711:610 +CHECK-NEXT: 1: 610 +CHECK-NOT: main + +RUN: llvm-profdata merge --sample %p/Inputs/sample-profile.proftext --text --function="_Z3.*" --no-function="fooi$" | FileCheck %s --check-prefix=CHECK-FILTER2 +CHECK-FILTER2: _Z3bari:20301:1437 +CHECK-NEXT: 1: 1437 +CHECK-NOT: main +CHECK-NOT: _Z3fooi + +RUN: llvm-profdata merge --sample --extbinary --use-md5 -output=%t.0.profdata %p/Inputs/sample-profile.proftext +RUN: llvm-profdata merge --sample %t.0.profdata --text --function="_Z3fooi" | FileCheck %s --check-prefix=CHECK-FILTER-MD5 +CHECK-FILTER-MD5: 1228452328526475178:7711:610 +CHECK-NEXT: 1: 610 +CHECK-NOT: 15822663052811949562 +CHECK-NOT: 3727899762981752933 + +RUN: llvm-profdata merge --instr %p/Inputs/basic.proftext --text --function="foo" | FileCheck %s --check-prefix=CHECK-FILTER3 +RUN: llvm-profdata merge --instr %p/Inputs/basic.proftext --text --no-function="main" | FileCheck %s --check-prefix=CHECK-FILTER3 +CHECK-FILTER3: foo +CHECK-NEXT: # Func Hash: +CHECK-NEXT: 10 +CHECK-NEXT: # Num Counters: +CHECK-NEXT: 2 +CHECK-NEXT: # Counter Values: +CHECK-NEXT: 499500 +CHECK-NEXT: 179900 +CHECK-NEXT: +CHECK-NEXT: foo2 +CHECK-NEXT: # Func Hash: +CHECK-NEXT: 10 +CHECK-NEXT: # Num Counters: +CHECK-NEXT: 2 +CHECK-NEXT: # Counter Values: +CHECK-NEXT: 500500 +CHECK-NEXT: 180100 + +RUN: llvm-profdata merge --instr %p/Inputs/basic.proftext --text --function="foo" --no-function="^foo$" | FileCheck %s --check-prefix=CHECK-FILTER4 +CHECK-FILTER4: foo2 +CHECK-NEXT: # Func Hash: +CHECK-NEXT: 10 +CHECK-NEXT: # Num Counters: +CHECK-NEXT: 2 +CHECK-NEXT: # Counter Values: +CHECK-NEXT: 500500 +CHECK-NEXT: 180100 + +RUN: llvm-profdata merge --sample %p/Inputs/cs-sample.proftext --text --function="main.*@.*_Z5funcBi" | FileCheck %s --check-prefix=CHECK-FILTER5 +CHECK-FILTER5: [main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20 +CHECK-NEXT: 0: 15 +CHECK-NEXT: 1: 15 +CHECK-NEXT: 3: 74946 +CHECK-NEXT: 4: 74941 _Z3fibi:82359 +CHECK-NEXT: 10: 23324 +CHECK-NEXT: 11: 23327 _Z3fibi:25228 +CHECK-NEXT: 15: 11 +CHECK-NEXT: !Attributes: 1 +CHECK-NEXT: [main:3.1 @ _Z5funcBi]:120:19 +CHECK-NEXT: 0: 19 +CHECK-NEXT: 1: 19 _Z8funcLeafi:20 +CHECK-NEXT: 3: 12 +CHECK-NEXT: !Attributes: 1 + diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index e6dc81ba1f5b..239aa1c93a2c 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/MD5.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Regex.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Threading.h" #include "llvm/Support/VirtualFileSystem.h" @@ -131,9 +132,11 @@ cl::opt<std::string> cl::sub(MergeSubcommand)); cl::opt<std::string> FuncNameFilter( "function", - cl::desc("Details for matching functions. For overlapping CSSPGO, this " - "takes a function name with calling context."), - cl::sub(ShowSubcommand), cl::sub(OverlapSubcommand)); + cl::desc("Only functions matching the filter are shown in the output. For " + "overlapping CSSPGO, this takes a function name with calling " + "context."), + cl::sub(ShowSubcommand), cl::sub(OverlapSubcommand), + cl::sub(MergeSubcommand)); // TODO: Consider creating a template class (e.g., MergeOption, ShowOption) to // factor out the common cl::sub in cl::opt constructor for subcommand-specific @@ -243,6 +246,10 @@ cl::opt<uint64_t> TemporalProfMaxTraceLength( cl::sub(MergeSubcommand), cl::desc("The maximum length of a single temporal profile trace " "(default: 10000)")); +cl::opt<std::string> FuncNameNegativeFilter( + "no-function", cl::init(""), + cl::sub(MergeSubcommand), + cl::desc("Exclude functions matching the filter from the output.")); cl::opt<FailureMode> FailMode("failure-mode", cl::init(failIfAnyAreInvalid), @@ -759,6 +766,62 @@ static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) { }); } +static StringRef +getFuncName(const StringMap<InstrProfWriter::ProfilingData>::value_type &Val) { + return Val.first(); +} + +static std::string +getFuncName(const SampleProfileMap::value_type &Val) { + return Val.second.getContext().toString(); +} + +template <typename T> +static void filterFunctions(T &ProfileMap) { + bool hasFilter = !FuncNameFilter.empty(); + bool hasNegativeFilter = !FuncNameNegativeFilter.empty(); + if (!hasFilter && !hasNegativeFilter) + return; + + // If filter starts with '?' it is MSVC mangled name, not a regex. + llvm::Regex ProbablyMSVCMangledName("[?@$_0-9A-Za-z]+"); + if (hasFilter && FuncNameFilter[0] == '?' && + ProbablyMSVCMangledName.match(FuncNameFilter)) + FuncNameFilter = llvm::Regex::escape(FuncNameFilter); + if (hasNegativeFilter && FuncNameNegativeFilter[0] == '?' && + ProbablyMSVCMangledName.match(FuncNameNegativeFilter)) + FuncNameNegativeFilter = llvm::Regex::escape(FuncNameNegativeFilter); + + size_t Count = ProfileMap.size(); + llvm::Regex Pattern(FuncNameFilter); + llvm::Regex NegativePattern(FuncNameNegativeFilter); + std::string Error; + if (hasFilter && !Pattern.isValid(Error)) + exitWithError(Error); + if (hasNegativeFilter && !NegativePattern.isValid(Error)) + exitWithError(Error); + + // Handle MD5 profile, so it is still able to match using the original name. + std::string MD5Name = std::to_string(llvm::MD5Hash(FuncNameFilter)); + std::string NegativeMD5Name = + std::to_string(llvm::MD5Hash(FuncNameNegativeFilter)); + + for (auto I = ProfileMap.begin(); I != ProfileMap.end();) { + auto Tmp = I++; + const auto &FuncName = getFuncName(*Tmp); + // Negative filter has higher precedence than positive filter. + if ((hasNegativeFilter && + (NegativePattern.match(FuncName) || + (FunctionSamples::UseMD5 && NegativeMD5Name == FuncName))) || + (hasFilter && !(Pattern.match(FuncName) || + (FunctionSamples::UseMD5 && MD5Name == FuncName)))) + ProfileMap.erase(Tmp); + } + + llvm::dbgs() << Count - ProfileMap.size() << " of " << Count << " functions " + << "in the original profile are filtered.\n"; +} + static void writeInstrProfile(StringRef OutputFilename, ProfileFormat OutputFormat, InstrProfWriter &Writer) { @@ -878,6 +941,8 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs, (NumErrors > 0 && FailMode == failIfAnyAreInvalid)) exitWithError("no profile can be merged"); + filterFunctions(Contexts[0]->Writer.getProfileData()); + writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer); } @@ -1459,6 +1524,8 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs, ProfileIsCS = FunctionSamples::ProfileIsCS = false; } + filterFunctions(ProfileMap); + auto WriterOrErr = SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]); if (std::error_code EC = WriterOrErr.getError()) |