summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMilian Wolff <milian.wolff@kdab.com>2020-06-14 13:35:54 +0200
committerMilian Wolff <milian.wolff@kdab.com>2020-09-11 10:31:00 +0000
commit8b458e6ef86aefd6e92a1359eb797ca8d55257cd (patch)
tree121694ddf48782622d07734eec1455c00793bc28
parent38829bdee03b953e1250b5a675c96790843e9666 (diff)
Reduce calls to dwarf_getscopes to improve performance
Implement an alternative approach to resolving inline frames and cache subprogram die names. The problem with dwarf_getscopes and dwarf_getscopes_die is that it often has to traverse a large part of the DIE tree within a CU DIE to find its result. For larger DSOs this repeated tree walking can consume a lot of time. The idea behind this patch is the following: We find (and cache) the Dwarf_Die's with DW_TAG_subprogram and then look for inline frames in that subtree directly. Additionally, we cache the scope names within a CU DIE more aggressively. I hope that in the future this area can be improved even further, as the remaining calls to dwarf_getscopes_die still make up a large fraction of the overall analysis cost. The following shows the performance impact of this patch for a perf.data file with about 6M samples. Many frames in the callstacks point to a self-compiled libclang.so with debug symbols. That library alone is roughly 600MB large. This makes finding inline frames quite slow. Before: ``` 80.159,75 msec task-clock # 0,984 CPUs utilized 4.075 context-switches # 0,051 K/sec 1 cpu-migrations # 0,000 K/sec 152.257 page-faults # 0,002 M/sec 346.071.892.881 cycles # 4,317 GHz (83,33%) 1.940.060.936 stalled-cycles-frontend # 0,56% frontend cycles idle (83,33%) 38.399.679.774 stalled-cycles-backend # 11,10% backend cycles idle (83,34%) 999.298.133.335 instructions # 2,89 insn per cycle # 0,04 stalled cycles per insn (83,31%) 239.561.868.424 branches # 2988,556 M/sec (83,34%) 1.163.589.915 branch-misses # 0,49% of all branches (83,34%) 81,497496973 seconds time elapsed 79,554970000 seconds user 0,404933000 seconds sys ``` After: ``` 15.558,09 msec task-clock # 1,000 CPUs utilized 99 context-switches # 0,006 K/sec 1 cpu-migrations # 0,000 K/sec 151.446 page-faults # 0,010 M/sec 67.961.461.389 cycles # 4,368 GHz (83,32%) 759.299.629 stalled-cycles-frontend # 1,12% frontend cycles idle (83,32%) 7.369.116.441 stalled-cycles-backend # 10,84% backend cycles idle (83,34%) 187.648.727.850 instructions # 2,76 insn per cycle # 0,04 stalled cycles per insn (83,34%) 45.231.315.052 branches # 2907,254 M/sec (83,34%) 200.377.846 branch-misses # 0,44% of all branches (83,33%) 15,560370834 seconds time elapsed 15,230305000 seconds user 0,285776000 seconds sys ``` Which means we are now roughly 5x faster than before, which is a pretty significant gain. Relates-To: https://github.com/KDAB/hotspot/issues/192 Change-Id: I0669cc3aad886b22165eaf1d0836a56e5183898d Reviewed-by: Ulf Hermann <ulf.hermann@qt.io>
-rw-r--r--app/CMakeLists.txt2
-rw-r--r--app/app.pro7
-rw-r--r--app/app.qbs3
-rw-r--r--app/perfdwarfdiecache.cpp352
-rw-r--r--app/perfdwarfdiecache.h132
-rw-r--r--app/perfeucompat.h42
-rw-r--r--app/perfsymboltable.cpp292
-rw-r--r--app/perfsymboltable.h14
8 files changed, 597 insertions, 247 deletions
diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt
index 80e4246..746583e 100644
--- a/app/CMakeLists.txt
+++ b/app/CMakeLists.txt
@@ -20,6 +20,8 @@ add_qtc_library(perfparser_lib STATIC
perfelfmap.cpp perfelfmap.h
perfkallsyms.cpp perfkallsyms.h
perftracingdata.cpp perftracingdata.h
+ perfdwarfdiecache.cpp perfdwarfdiecache.h
+ perfeucompat.h
)
if (Zstd_FOUND)
diff --git a/app/app.pro b/app/app.pro
index 745cd20..24b3ec1 100644
--- a/app/app.pro
+++ b/app/app.pro
@@ -30,7 +30,8 @@ SOURCES += main.cpp \
perfsymboltable.cpp \
perfelfmap.cpp \
perfkallsyms.cpp \
- perftracingdata.cpp
+ perftracingdata.cpp \
+ perfdwarfdiecache.cpp
HEADERS += \
perfaddresscache.h \
@@ -45,6 +46,8 @@ HEADERS += \
perfsymboltable.h \
perfelfmap.h \
perfkallsyms.h \
- perftracingdata.h
+ perftracingdata.h \
+ perfdwarfdiecache.h \
+ perfeucompat.h
OTHER_FILES += app.qbs
diff --git a/app/app.qbs b/app/app.qbs
index 646e09a..71eff96 100644
--- a/app/app.qbs
+++ b/app/app.qbs
@@ -40,5 +40,8 @@ QtcTool {
"perfkallsyms.h",
"perftracingdata.cpp",
"perftracingdata.h",
+ "perfdwarfdiecache.cpp",
+ "perfdwarfdiecache.h",
+ "perfeucompat.h"
]
}
diff --git a/app/perfdwarfdiecache.cpp b/app/perfdwarfdiecache.cpp
new file mode 100644
index 0000000..4823646
--- /dev/null
+++ b/app/perfdwarfdiecache.cpp
@@ -0,0 +1,352 @@
+/****************************************************************************
+**
+** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Milian Wolff <milian.wolff@kdab.com>
+** Contact: http://www.qt.io/licensing/
+**
+** This file is part of the Qt Enterprise Perf Profiler Add-on.
+**
+** GNU General Public License Usage
+** This file may be used under the terms of the GNU General Public License
+** version 3 as published by the Free Software Foundation and appearing in
+** the file LICENSE.GPLv3 included in the packaging of this file. Please
+** review the following information to ensure the GNU General Public License
+** requirements will be met: https://www.gnu.org/licenses/gpl.html.
+**
+** If you have questions regarding the use of this file, please use
+** contact form at http://www.qt.io/contact-us
+**
+****************************************************************************/
+
+#include "perfdwarfdiecache.h"
+#include "perfeucompat.h"
+
+#include <dwarf.h>
+
+#ifdef HAVE_RUSTC_DEMANGLE
+#include <rustc_demangle.h>
+#endif
+
+namespace {
+enum class WalkResult
+{
+ Recurse,
+ Skip,
+ Return
+};
+template<typename Callback>
+WalkResult walkDieTree(const Callback &callback, Dwarf_Die *die)
+{
+ auto result = callback(die);
+ if (result != WalkResult::Recurse)
+ return result;
+
+ Dwarf_Die childDie;
+ if (dwarf_child(die, &childDie) == 0) {
+ result = walkDieTree(callback, &childDie);
+ if (result == WalkResult::Return)
+ return result;
+
+ Dwarf_Die siblingDie;
+ while (dwarf_siblingof(&childDie, &siblingDie) == 0) {
+ result = walkDieTree(callback, &siblingDie);
+ if (result == WalkResult::Return)
+ return result;
+ childDie = siblingDie;
+ }
+ }
+ return WalkResult::Skip;
+}
+
+template<typename Callback>
+void walkRanges(const Callback &callback, Dwarf_Die *die)
+{
+ Dwarf_Addr low = 0;
+ Dwarf_Addr high = 0;
+ Dwarf_Addr base = 0;
+ ptrdiff_t rangeOffset = 0;
+ while ((rangeOffset = dwarf_ranges(die, rangeOffset, &base, &low, &high)) > 0) {
+ if (!callback(DwarfRange{low, high}))
+ return;
+ }
+}
+
+// see libdw_visit_scopes.c in elfutils
+bool mayHaveScopes(Dwarf_Die *die)
+{
+ switch (dwarf_tag(die))
+ {
+ /* DIEs with addresses we can try to match. */
+ case DW_TAG_compile_unit:
+ case DW_TAG_module:
+ case DW_TAG_lexical_block:
+ case DW_TAG_with_stmt:
+ case DW_TAG_catch_block:
+ case DW_TAG_try_block:
+ case DW_TAG_entry_point:
+ case DW_TAG_inlined_subroutine:
+ case DW_TAG_subprogram:
+ return true;
+
+ /* DIEs without addresses that can own DIEs with addresses. */
+ case DW_TAG_namespace:
+ case DW_TAG_class_type:
+ case DW_TAG_structure_type:
+ return true;
+
+ /* Other DIEs we have no reason to descend. */
+ default:
+ break;
+ }
+ return false;
+}
+
+bool dieContainsAddress(Dwarf_Die *die, Dwarf_Addr address)
+{
+ bool contained = false;
+ walkRanges([&contained, address](DwarfRange range) {
+ if (range.contains(address)) {
+ contained = true;
+ return false;
+ }
+ return true;
+ }, die);
+ return contained;
+}
+}
+
+const char *linkageName(Dwarf_Die *die)
+{
+ Dwarf_Attribute attr;
+ Dwarf_Attribute *result = dwarf_attr_integrate(die, DW_AT_MIPS_linkage_name, &attr);
+ if (!result)
+ result = dwarf_attr_integrate(die, DW_AT_linkage_name, &attr);
+
+ return result ? dwarf_formstring(result) : nullptr;
+}
+
+Dwarf_Die *specificationDie(Dwarf_Die *die, Dwarf_Die *dieMem)
+{
+ Dwarf_Attribute attr;
+ if (dwarf_attr_integrate(die, DW_AT_specification, &attr))
+ return dwarf_formref_die(&attr, dieMem);
+ return nullptr;
+}
+
+/// prepend the names of all scopes that reference the @p die to @p name
+void prependScopeNames(QByteArray &name, Dwarf_Die *die, QHash<Dwarf_Off, QByteArray> &cache)
+{
+ Dwarf_Die dieMem;
+ Dwarf_Die *scopes = nullptr;
+ auto nscopes = dwarf_getscopes_die(die, &scopes);
+
+ struct ScopesToCache
+ {
+ Dwarf_Off offset;
+ int trailing;
+ };
+ QVector<ScopesToCache> cacheOps;
+
+ // skip scope for the die itself at the start and the compile unit DIE at end
+ for (int i = 1; i < nscopes - 1; ++i) {
+ auto scope = scopes + i;
+
+ const auto scopeOffset = dwarf_dieoffset(scope);
+
+ auto it = cache.find(scopeOffset);
+ if (it != cache.end()) {
+ name.prepend(*it);
+ // we can stop, cached names are always fully qualified
+ break;
+ }
+
+ if (auto scopeLinkageName = linkageName(scope)) {
+ // prepend the fully qualified linkage name
+ name.prepend("::");
+ cacheOps.append({scopeOffset, name.size()});
+ // we have to demangle the scope linkage name, otherwise we get a
+ // mish-mash of mangled and non-mangled names
+ name.prepend(demangle(scopeLinkageName));
+ // we can stop now, the scope is fully qualified
+ break;
+ }
+
+ if (auto scopeName = dwarf_diename(scope)) {
+ // prepend this scope's name, e.g. the class or namespace name
+ name.prepend("::");
+ cacheOps.append({scopeOffset, name.size()});
+ name.prepend(scopeName);
+ }
+
+ if (auto specification = specificationDie(scope, &dieMem)) {
+ eu_compat_free(scopes);
+ scopes = nullptr;
+ cacheOps.append({scopeOffset, name.size()});
+ cacheOps.append({dwarf_dieoffset(specification), name.size()});
+ // follow the scope's specification DIE instead
+ prependScopeNames(name, specification, cache);
+ break;
+ }
+ }
+
+ for (const auto &cacheOp : cacheOps)
+ cache[cacheOp.offset] = name.mid(0, name.size() - cacheOp.trailing);
+
+ eu_compat_free(scopes);
+}
+
+bool operator==(const Dwarf_Die &lhs, const Dwarf_Die &rhs)
+{
+ return lhs.addr == rhs.addr && lhs.cu == rhs.cu && lhs.abbrev == rhs.abbrev;
+}
+
+QByteArray qualifiedDieName(Dwarf_Die *die, QHash<Dwarf_Off, QByteArray> &cache)
+{
+ // linkage names are fully qualified, meaning we can stop early then
+ if (auto name = linkageName(die))
+ return name;
+
+ // otherwise do a more complex lookup that includes namespaces and other context information
+ // this is important for inlined subroutines such as lambdas or std:: algorithms
+ QByteArray name = dwarf_diename(die);
+
+ // use the specification DIE which is within the DW_TAG_namespace
+ Dwarf_Die dieMem;
+ if (auto specification = specificationDie(die, &dieMem))
+ die = specification;
+
+ prependScopeNames(name, die, cache);
+
+ return name;
+}
+
+QByteArray demangle(const QByteArray &mangledName)
+{
+ if (mangledName.length() < 3) {
+ return mangledName;
+ } else {
+ static size_t demangleBufferLength = 1024;
+ static char *demangleBuffer = reinterpret_cast<char *>(eu_compat_malloc(demangleBufferLength));
+
+#ifdef HAVE_RUSTC_DEMANGLE
+ if (rustc_demangle(mangledName.constData(), demangleBuffer, demangleBufferLength))
+ return demangleBuffer;
+#endif
+
+ // Require GNU v3 ABI by the "_Z" prefix.
+ if (mangledName[0] == '_' && mangledName[1] == 'Z') {
+ int status = -1;
+ char *dsymname = eu_compat_demangle(mangledName.constData(), demangleBuffer, &demangleBufferLength,
+ &status);
+ if (status == 0)
+ return demangleBuffer = dsymname;
+ }
+ }
+ return mangledName;
+}
+
+QVector<Dwarf_Die> findInlineScopes(Dwarf_Die *subprogram, Dwarf_Addr offset)
+{
+ QVector<Dwarf_Die> scopes;
+ walkDieTree([offset, &scopes](Dwarf_Die *die) {
+ if (dwarf_tag(die) != DW_TAG_inlined_subroutine)
+ return WalkResult::Recurse;
+ if (dieContainsAddress(die, offset)) {
+ scopes.append(*die);
+ return WalkResult::Recurse;
+ }
+ return WalkResult::Skip;
+ }, subprogram);
+ return scopes;
+}
+
+SubProgramDie::SubProgramDie(Dwarf_Die die)
+ : m_ranges{die, {}}
+{
+ walkRanges([this](DwarfRange range) {
+ m_ranges.ranges.append(range);
+ return true;
+ }, &die);
+}
+
+SubProgramDie::~SubProgramDie() = default;
+
+CuDieRangeMapping::CuDieRangeMapping(Dwarf_Die cudie, Dwarf_Addr bias)
+ : m_bias{bias}
+ , m_cuDieRanges{cudie, {}}
+{
+ walkRanges([this, bias](DwarfRange range) {
+ m_cuDieRanges.ranges.append({range.low + bias, range.high + bias});
+ return true;
+ }, &cudie);
+}
+
+CuDieRangeMapping::~CuDieRangeMapping() = default;
+
+SubProgramDie *CuDieRangeMapping::findSubprogramDie(Dwarf_Addr offset)
+{
+ if (m_subPrograms.isEmpty())
+ addSubprograms();
+
+ auto it = std::find_if(m_subPrograms.begin(), m_subPrograms.end(),
+ [offset](const SubProgramDie &program) {
+ return program.contains(offset);
+ });
+ if (it == m_subPrograms.end())
+ return nullptr;
+
+ return &(*it);
+}
+
+void CuDieRangeMapping::addSubprograms()
+{
+ walkDieTree([this](Dwarf_Die *die) {
+ if (!mayHaveScopes(die))
+ return WalkResult::Skip;
+
+ if (dwarf_tag(die) == DW_TAG_subprogram) {
+ SubProgramDie program(*die);
+ if (!program.isEmpty())
+ m_subPrograms.append(program);
+
+ return WalkResult::Skip;
+ }
+ return WalkResult::Recurse;
+ }, cudie());
+}
+
+QByteArray CuDieRangeMapping::dieName(Dwarf_Die *die)
+{
+ auto &name = m_dieNameCache[dwarf_dieoffset(die)];
+ if (name.isEmpty())
+ name = demangle(qualifiedDieName(die, m_dieNameCache));
+
+ return name;
+}
+
+PerfDwarfDieCache::PerfDwarfDieCache(Dwfl_Module *mod)
+{
+ if (!mod)
+ return;
+
+ Dwarf_Die *die = nullptr;
+ Dwarf_Addr bias = 0;
+ while ((die = dwfl_module_nextcu(mod, die, &bias))) {
+ CuDieRangeMapping cuDieMapping(*die, bias);
+ if (!cuDieMapping.isEmpty())
+ m_cuDieRanges.push_back(cuDieMapping);
+ }
+}
+
+PerfDwarfDieCache::~PerfDwarfDieCache() = default;
+
+CuDieRangeMapping *PerfDwarfDieCache::findCuDie(Dwarf_Addr addr)
+{
+ auto it = std::find_if(m_cuDieRanges.begin(), m_cuDieRanges.end(),
+ [addr](const CuDieRangeMapping &cuDieMapping) {
+ return cuDieMapping.contains(addr);
+ });
+ if (it == m_cuDieRanges.end())
+ return nullptr;
+
+ return &(*it);
+}
diff --git a/app/perfdwarfdiecache.h b/app/perfdwarfdiecache.h
new file mode 100644
index 0000000..26183b1
--- /dev/null
+++ b/app/perfdwarfdiecache.h
@@ -0,0 +1,132 @@
+/****************************************************************************
+**
+** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Milian Wolff <milian.wolff@kdab.com>
+** Contact: http://www.qt.io/licensing/
+**
+** This file is part of the Qt Enterprise Perf Profiler Add-on.
+**
+** GNU General Public License Usage
+** This file may be used under the terms of the GNU General Public License
+** version 3 as published by the Free Software Foundation and appearing in
+** the file LICENSE.GPLv3 included in the packaging of this file. Please
+** review the following information to ensure the GNU General Public License
+** requirements will be met: https://www.gnu.org/licenses/gpl.html.
+**
+** If you have questions regarding the use of this file, please use
+** contact form at http://www.qt.io/contact-us
+**
+****************************************************************************/
+
+#pragma once
+
+#include <libdwfl.h>
+
+#include <QVector>
+#include <QHash>
+
+#include <algorithm>
+
+/// @return the demangled symbol name
+QByteArray demangle(const QByteArray &mangledName);
+
+struct DwarfRange
+{
+ Dwarf_Addr low;
+ Dwarf_Addr high;
+
+ bool contains(Dwarf_Addr addr) const
+ {
+ return low <= addr && addr < high;
+ }
+};
+
+/// cache of dwarf ranges for a given Dwarf_Die
+struct DieRanges
+{
+ Dwarf_Die die;
+ QVector<DwarfRange> ranges;
+
+ bool contains(Dwarf_Addr addr) const
+ {
+ return std::any_of(ranges.begin(), ranges.end(), [addr](const DwarfRange &range) {
+ return range.contains(addr);
+ });
+ }
+};
+
+/// cache of sub program DIE, its ranges and the accompanying die name
+class SubProgramDie
+{
+public:
+ SubProgramDie() = default;
+ SubProgramDie(Dwarf_Die die);
+ ~SubProgramDie();
+
+ bool isEmpty() const { return m_ranges.ranges.isEmpty(); }
+ /// @p offset a bias-corrected offset
+ bool contains(Dwarf_Addr offset) const { return m_ranges.contains(offset); }
+ Dwarf_Die *die() { return &m_ranges.die; }
+
+private:
+ DieRanges m_ranges;
+};
+
+/// cache of dwarf ranges for a CU DIE and child sub programs
+class CuDieRangeMapping
+{
+public:
+ CuDieRangeMapping() = default;
+ CuDieRangeMapping(Dwarf_Die cudie, Dwarf_Addr bias);
+ ~CuDieRangeMapping();
+
+ bool isEmpty() const { return m_cuDieRanges.ranges.isEmpty(); }
+ bool contains(Dwarf_Addr addr) const { return m_cuDieRanges.contains(addr); }
+ Dwarf_Addr bias() { return m_bias; }
+ Dwarf_Die *cudie() { return &m_cuDieRanges.die; }
+
+ /// On first call this will visit the CU DIE to cache all subprograms
+ /// @return the DW_TAG_subprogram DIE that contains @p offset
+ /// @p offset a bias-corrected address to find a subprogram for
+ SubProgramDie *findSubprogramDie(Dwarf_Addr offset);
+
+ /// @return a fully qualified, demangled symbol name for @p die
+ QByteArray dieName(Dwarf_Die *die);
+
+private:
+ void addSubprograms();
+
+ Dwarf_Addr m_bias = 0;
+ DieRanges m_cuDieRanges;
+ QVector<SubProgramDie> m_subPrograms;
+ QHash<Dwarf_Off, QByteArray> m_dieNameCache;
+};
+
+/**
+ * @return all DW_TAG_inlined_subroutine DIEs that contain @p offset
+ * @p subprogram DIE sub tree that should be traversed to look for inlined scopes
+ * @p offset bias-corrected address that is checked against the dwarf ranges of the DIEs
+ */
+QVector<Dwarf_Die> findInlineScopes(Dwarf_Die *subprogram, Dwarf_Addr offset);
+
+/**
+ * This cache makes it easily possible to find a CU DIE (i.e. Compilation Unit Debugging Information Entry)
+ * based on a
+ */
+class PerfDwarfDieCache
+{
+public:
+ PerfDwarfDieCache(Dwfl_Module *mod = nullptr);
+ ~PerfDwarfDieCache();
+
+ /// @p addr absolute address, not bias-corrected
+ CuDieRangeMapping *findCuDie(Dwarf_Addr addr);
+
+public:
+ QVector<CuDieRangeMapping> m_cuDieRanges;
+};
+QT_BEGIN_NAMESPACE
+Q_DECLARE_TYPEINFO(DwarfRange, Q_MOVABLE_TYPE);
+Q_DECLARE_TYPEINFO(PerfDwarfDieCache, Q_MOVABLE_TYPE);
+Q_DECLARE_TYPEINFO(DieRanges, Q_MOVABLE_TYPE);
+Q_DECLARE_TYPEINFO(CuDieRangeMapping, Q_MOVABLE_TYPE);
+QT_END_NAMESPACE
diff --git a/app/perfeucompat.h b/app/perfeucompat.h
new file mode 100644
index 0000000..b537da6
--- /dev/null
+++ b/app/perfeucompat.h
@@ -0,0 +1,42 @@
+/****************************************************************************
+**
+** Copyright (C) 2020 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of Qt Creator.
+**
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3 as published by the Free Software
+** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-3.0.html.
+**
+****************************************************************************/
+
+#pragma once
+
+#include <fcntl.h>
+
+#ifdef Q_OS_WIN
+#include <libeu_compat.h>
+#else
+#include <cxxabi.h>
+#include <unistd.h>
+#define eu_compat_open open
+#define eu_compat_close close
+#define eu_compat_malloc malloc
+#define eu_compat_free free
+#define eu_compat_demangle abi::__cxa_demangle
+#define eu_compat_strdup strdup
+#define O_BINARY 0
+#endif
diff --git a/app/perfsymboltable.cpp b/app/perfsymboltable.cpp
index 240b46a..64b0856 100644
--- a/app/perfsymboltable.cpp
+++ b/app/perfsymboltable.cpp
@@ -25,8 +25,8 @@
#include "perfsymboltable.h"
#include "perfunwind.h"
-
-#include <dwarf.h>
+#include "perfdwarfdiecache.h"
+#include "perfeucompat.h"
#include <QDebug>
#include <QDir>
@@ -34,25 +34,8 @@
#include <tuple>
#include <cstring>
-#include <fcntl.h>
-
-#ifdef Q_OS_WIN
-#include <libeu_compat.h>
-#else
-#include <cxxabi.h>
-#include <unistd.h>
-#define eu_compat_open open
-#define eu_compat_close close
-#define eu_compat_malloc malloc
-#define eu_compat_free free
-#define eu_compat_demangle abi::__cxa_demangle
-#define eu_compat_strdup strdup
-#define O_BINARY 0
-#endif
-
-#ifdef HAVE_RUSTC_DEMANGLE
-#include <rustc_demangle.h>
-#endif
+
+#include <dwarf.h>
PerfSymbolTable::PerfSymbolTable(qint32 pid, Dwfl_Callbacks *callbacks, PerfUnwind *parent) :
m_perfMapFile(QDir::tempPath() + QDir::separator()
@@ -333,111 +316,8 @@ void PerfSymbolTable::registerElf(const PerfRecordMmap &mmap, const QByteArray &
clearCache();
}
-static QByteArray demangle(const QByteArray &mangledName)
-{
- if (mangledName.length() < 3) {
- return mangledName;
- } else {
- static size_t demangleBufferLength = 1024;
- static char *demangleBuffer = reinterpret_cast<char *>(eu_compat_malloc(demangleBufferLength));
-
-#ifdef HAVE_RUSTC_DEMANGLE
- if (rustc_demangle(mangledName.constData(), demangleBuffer, demangleBufferLength))
- return demangleBuffer;
-#endif
-
- // Require GNU v3 ABI by the "_Z" prefix.
- if (mangledName[0] == '_' && mangledName[1] == 'Z') {
- int status = -1;
- char *dsymname = eu_compat_demangle(mangledName.constData(), demangleBuffer, &demangleBufferLength,
- &status);
- if (status == 0)
- return demangleBuffer = dsymname;
- }
- }
- return mangledName;
-}
-
-/// @return the fully qualified linkage name
-static const char *linkageName(Dwarf_Die *die)
-{
- Dwarf_Attribute attr;
- Dwarf_Attribute *result = dwarf_attr_integrate(die, DW_AT_MIPS_linkage_name, &attr);
- if (!result)
- result = dwarf_attr_integrate(die, DW_AT_linkage_name, &attr);
-
- return result ? dwarf_formstring(result) : nullptr;
-}
-
-/// @return the referenced DW_AT_specification DIE
-/// inlined subroutines of e.g. std:: algorithms aren't namespaced, but their DW_AT_specification DIE is
-static Dwarf_Die *specificationDie(Dwarf_Die *die, Dwarf_Die *dieMem)
-{
- Dwarf_Attribute attr;
- if (dwarf_attr_integrate(die, DW_AT_specification, &attr))
- return dwarf_formref_die(&attr, dieMem);
- return nullptr;
-}
-
-/// prepend the names of all scopes that reference the @p die to @p name
-static void prependScopeNames(QByteArray &name, Dwarf_Die *die)
-{
- Dwarf_Die dieMem;
- Dwarf_Die *scopes = nullptr;
- auto nscopes = dwarf_getscopes_die(die, &scopes);
-
- // skip scope for the die itself at the start and the compile unit DIE at end
- for (int i = 1; i < nscopes - 1; ++i) {
- auto scope = scopes + i;
-
- if (auto scopeLinkageName = linkageName(scope)) {
- // prepend the fully qualified linkage name
- name.prepend("::");
- // we have to demangle the scope linkage name, otherwise we get a
- // mish-mash of mangled and non-mangled names
- name.prepend(demangle(scopeLinkageName));
- // we can stop now, the scope is fully qualified
- break;
- }
-
- if (auto scopeName = dwarf_diename(scope)) {
- // prepend this scope's name, e.g. the class or namespace name
- name.prepend("::");
- name.prepend(scopeName);
- }
-
- if (auto specification = specificationDie(scope, &dieMem)) {
- eu_compat_free(scopes);
- scopes = nullptr;
- // follow the scope's specification DIE instead
- prependScopeNames(name, specification);
- break;
- }
- }
-
- eu_compat_free(scopes);
-}
-
-static QByteArray dieName(Dwarf_Die *die)
-{
- // linkage names are fully qualified, meaning we can stop early then
- if (auto name = linkageName(die))
- return name;
-
- // otherwise do a more complex lookup that includes namespaces and other context information
- // this is important for inlined subroutines such as lambdas or std:: algorithms
- QByteArray name = dwarf_diename(die);
-
- // use the specification DIE which is within the DW_TAG_namespace
- Dwarf_Die dieMem;
- if (auto specification = specificationDie(die, &dieMem))
- die = specification;
-
- prependScopeNames(name, die);
- return name;
-}
-
-int PerfSymbolTable::insertSubprogram(Dwarf_Die *top, Dwarf_Addr entry, qint32 binaryId, qint32 binaryPathId,
+int PerfSymbolTable::insertSubprogram(CuDieRangeMapping *cudie, Dwarf_Die *top, Dwarf_Addr entry,
+ qint32 binaryId, qint32 binaryPathId,
qint32 inlineCallLocationId, bool isKernel)
{
int line = 0;
@@ -449,14 +329,14 @@ int PerfSymbolTable::insertSubprogram(Dwarf_Die *top, Dwarf_Addr entry, qint32 b
qint32 fileId = m_unwind->resolveString(file);
int locationId = m_unwind->resolveLocation(PerfUnwind::Location(entry, fileId, m_pid, line,
column, inlineCallLocationId));
- qint32 symId = m_unwind->resolveString(demangle(dieName(top)));
+ qint32 symId = m_unwind->resolveString(cudie->dieName(top));
m_unwind->resolveSymbol(locationId, PerfUnwind::Symbol(symId, binaryId, binaryPathId, isKernel));
return locationId;
}
-int PerfSymbolTable::parseDie(Dwarf_Die *top, qint32 binaryId, qint32 binaryPathId, bool isKernel,
- Dwarf_Files *files, Dwarf_Addr entry, qint32 parentLocationId)
+int PerfSymbolTable::parseDie(CuDieRangeMapping *cudie, Dwarf_Die *top, qint32 binaryId, qint32 binaryPathId,
+ bool isKernel, Dwarf_Files *files, Dwarf_Addr entry, qint32 parentLocationId)
{
int tag = dwarf_tag(top);
switch (tag) {
@@ -479,38 +359,35 @@ int PerfSymbolTable::parseDie(Dwarf_Die *top, qint32 binaryId, qint32 binaryPath
location.parentLocationId = parentLocationId;
int callLocationId = m_unwind->resolveLocation(location);
- return insertSubprogram(top, entry, binaryId, binaryPathId, callLocationId, isKernel);
+ return insertSubprogram(cudie, top, entry, binaryId, binaryPathId, callLocationId, isKernel);
}
case DW_TAG_subprogram:
- return insertSubprogram(top, entry, binaryId, binaryPathId, -1, isKernel);
+ return insertSubprogram(cudie, top, entry, binaryId, binaryPathId, -1, isKernel);
default:
return -1;
}
}
-qint32 PerfSymbolTable::parseDwarf(Dwarf_Die *cudie, Dwarf_Die *subroutine, Dwarf_Addr bias, qint32 binaryId,
- qint32 binaryPathId, bool isKernel)
+qint32 PerfSymbolTable::parseDwarf(CuDieRangeMapping *cudie, SubProgramDie *subprogram, const QVector<Dwarf_Die> &inlined,
+ Dwarf_Addr bias, qint32 binaryId, qint32 binaryPathId, bool isKernel)
{
- Dwarf_Die *scopes = nullptr;
- const auto nscopes = dwarf_getscopes_die(subroutine, &scopes);
-
Dwarf_Files *files = nullptr;
- dwarf_getsrcfiles(cudie, &files, nullptr);
+ dwarf_getsrcfiles(cudie->cudie(), &files, nullptr);
qint32 parentLocationId = -1;
- for (int i = nscopes - 1; i >= 0; --i) {
- const auto scope = &scopes[i];
+ auto handleDie = [&](Dwarf_Die scope) {
Dwarf_Addr scopeAddr = bias;
Dwarf_Addr entry = 0;
- if (dwarf_entrypc(scope, &entry) == 0 && entry != 0)
+ if (dwarf_entrypc(&scope, &entry) == 0 && entry != 0)
scopeAddr += entry;
- auto locationId = parseDie(scope, binaryId, binaryPathId, isKernel, files, scopeAddr, parentLocationId);
+ auto locationId = parseDie(cudie, &scope, binaryId, binaryPathId, isKernel, files, scopeAddr, parentLocationId);
if (locationId != -1)
parentLocationId = locationId;
- }
+ };
- eu_compat_free(scopes);
+ handleDie(*subprogram->die());
+ std::for_each(inlined.begin(), inlined.end(), handleDie);
return parentLocationId;
}
@@ -672,60 +549,6 @@ PerfElfMap::ElfInfo PerfSymbolTable::findElf(quint64 ip) const
return m_elfs.findElf(ip);
}
-class CuDieRanges
-{
-public:
- struct CuDieRange
- {
- Dwarf_Die *cuDie;
- Dwarf_Addr bias;
- Dwarf_Addr low;
- Dwarf_Addr high;
-
- bool contains(Dwarf_Addr addr) const
- {
- return low <= addr && addr < high;
- }
- };
-
- CuDieRanges(Dwfl_Module *mod = nullptr)
- {
- if (!mod)
- return;
-
- Dwarf_Die *die = nullptr;
- Dwarf_Addr bias = 0;
- while ((die = dwfl_module_nextcu(mod, die, &bias))) {
- Dwarf_Addr low = 0;
- Dwarf_Addr high = 0;
- Dwarf_Addr base = 0;
- ptrdiff_t offset = 0;
- while ((offset = dwarf_ranges(die, offset, &base, &low, &high)) > 0) {
- ranges.push_back(CuDieRange{die, bias, low + bias, high + bias});
- }
- }
- }
-
- Dwarf_Die *findDie(Dwarf_Addr addr, Dwarf_Addr *bias) const
- {
- auto it = std::find_if(ranges.begin(), ranges.end(),
- [addr](const CuDieRange &range) {
- return range.contains(addr);
- });
- if (it == ranges.end())
- return nullptr;
-
- *bias = it->bias;
- return it->cuDie;
- }
-public:
- QVector<CuDieRange> ranges;
-};
-QT_BEGIN_NAMESPACE
-Q_DECLARE_TYPEINFO(CuDieRanges, Q_MOVABLE_TYPE);
-Q_DECLARE_TYPEINFO(CuDieRanges::CuDieRange, Q_MOVABLE_TYPE);
-QT_END_NAMESPACE
-
int symbolIndex(const Elf64_Rel &rel)
{
return ELF64_R_SYM(rel.r_info);
@@ -938,8 +761,8 @@ int PerfSymbolTable::lookupFrame(Dwarf_Addr ip, bool isKernel,
} else {
GElf_Sym sym;
// For addrinfo we need the raw pointer into symtab, so we need to adjust ourselves.
- symname = dwfl_module_addrinfo(mod, addressLocation.address, &off, &sym, nullptr, nullptr,
- nullptr);
+ symname = demangle(dwfl_module_addrinfo(mod, addressLocation.address, &off, &sym,
+ nullptr, nullptr, nullptr));
if (off != addressLocation.address)
addressCache->cacheSymbol(elf, addressLocation.address - off, sym.st_size, symname);
}
@@ -951,20 +774,14 @@ int PerfSymbolTable::lookupFrame(Dwarf_Addr ip, bool isKernel,
Dwarf_Addr bias = 0;
functionLocation.address -= off; // in case we don't find anything better
- auto die = dwfl_module_addrdie(mod, addressLocation.address, &bias);
- if (!die) {
- // broken DWARF emitter by clang, e.g. no aranges
- // cf.: https://sourceware.org/ml/elfutils-devel/2017-q2/msg00180.html
- // build a custom lookup table and query that one
- if (!m_cuDieRanges.contains(mod)) {
- m_cuDieRanges[mod] = CuDieRanges(mod);
- }
- const auto& maps = m_cuDieRanges[mod];
- die = maps.findDie(addressLocation.address, &bias);
- }
+ if (!m_cuDieRanges.contains(mod))
+ m_cuDieRanges[mod] = PerfDwarfDieCache(mod);
- if (die) {
- auto srcloc = dwarf_getsrc_die(die, addressLocation.address - bias);
+ auto *cudie = m_cuDieRanges[mod].findCuDie(addressLocation.address);
+ if (cudie) {
+ bias = cudie->bias();
+ const auto offset = addressLocation.address - bias;
+ auto srcloc = dwarf_getsrc_die(cudie->cudie(), offset);
if (srcloc) {
const char* srcfile = dwarf_linesrc(srcloc, nullptr, nullptr);
if (srcfile) {
@@ -974,42 +791,39 @@ int PerfSymbolTable::lookupFrame(Dwarf_Addr ip, bool isKernel,
dwarf_linecol(srcloc, &addressLocation.column);
}
}
- }
- Dwarf_Die *subroutine = nullptr;
- Dwarf_Die *scopes = nullptr;
- int nscopes = dwarf_getscopes(die, addressLocation.address - bias, &scopes);
- for (int i = 0; i < nscopes; ++i) {
- Dwarf_Die *scope = &scopes[i];
- const int tag = dwarf_tag(scope);
- if (tag == DW_TAG_subprogram || tag == DW_TAG_inlined_subroutine) {
- Dwarf_Addr entry = 0;
- dwarf_entrypc(scope, &entry);
- symname = dieName(scope); // use name of inlined function as symbol
- functionLocation.address = entry + bias;
- functionLocation.file = m_unwind->resolveString(dwarf_decl_file(scope));
- dwarf_decl_line(scope, &functionLocation.line);
- dwarf_decl_column(scope, &functionLocation.column);
-
- subroutine = scope;
- break;
+ auto *subprogram = cudie->findSubprogramDie(offset);
+ if (subprogram) {
+ const auto scopes = findInlineScopes(subprogram->die(), offset);
+
+ // setup function location, i.e. entry point of the (inlined) frame
+ [&](Dwarf_Die die) {
+ Dwarf_Addr entry = 0;
+ dwarf_entrypc(&die, &entry);
+ symname = cudie->dieName(&die); // use name of inlined function as symbol
+ functionLocation.address = entry + bias;
+ functionLocation.file = m_unwind->resolveString(dwarf_decl_file(&die));
+ dwarf_decl_line(&die, &functionLocation.line);
+ dwarf_decl_column(&die, &functionLocation.column);
+ }(scopes.isEmpty() ? *subprogram->die() : scopes.last());
+
+ // check if the inline chain was cached already
+ addressLocation.parentLocationId = m_unwind->lookupLocation(functionLocation);
+ // otherwise resolve the inline chain if possible
+ if (!scopes.isEmpty() && !m_unwind->hasSymbol(addressLocation.parentLocationId)) {
+ functionLocation.parentLocationId = parseDwarf(cudie, subprogram, scopes, bias,
+ binaryId, binaryPathId, isKernel);
+ }
}
}
- // check if the inline chain was cached already
- addressLocation.parentLocationId = m_unwind->lookupLocation(functionLocation);
- // otherwise resolve the inline chain if possible
- if (subroutine && !m_unwind->hasSymbol(addressLocation.parentLocationId))
- functionLocation.parentLocationId = parseDwarf(die, subroutine, bias, binaryId, binaryPathId, isKernel);
- // then resolve and cache the inline chain
+ // resolve and cache the inline chain
if (addressLocation.parentLocationId == -1)
addressLocation.parentLocationId = m_unwind->resolveLocation(functionLocation);
-
- eu_compat_free(scopes);
}
if (!m_unwind->hasSymbol(addressLocation.parentLocationId)) {
// no sufficient debug information. Use what we already know
- qint32 symId = m_unwind->resolveString(demangle(symname));
+ qint32 symId = m_unwind->resolveString(symname);
m_unwind->resolveSymbol(addressLocation.parentLocationId,
PerfUnwind::Symbol(symId, binaryId, binaryPathId, isKernel));
}
diff --git a/app/perfsymboltable.h b/app/perfsymboltable.h
index 6b2c12b..526834b 100644
--- a/app/perfsymboltable.h
+++ b/app/perfsymboltable.h
@@ -34,7 +34,9 @@
#include <QObject>
-class CuDieRanges;
+class PerfDwarfDieCache;
+class SubProgramDie;
+class CuDieRangeMapping;
class PerfSymbolTable
{
@@ -115,17 +117,17 @@ private:
PerfElfMap m_elfs;
PerfAddressCache::OffsetAddressCache m_invalidAddressCache;
- QHash<Dwfl_Module*, CuDieRanges> m_cuDieRanges;
+ QHash<Dwfl_Module*, PerfDwarfDieCache> m_cuDieRanges;
Dwfl_Callbacks *m_callbacks;
qint32 m_pid;
QByteArray symbolFromPerfMap(quint64 ip, GElf_Off *offset) const;
- int parseDie(Dwarf_Die *top, qint32 binaryId, qint32 binaryPathId, bool isKernel,
+ int parseDie(CuDieRangeMapping *cudie, Dwarf_Die *top, qint32 binaryId, qint32 binaryPathId, bool isKernel,
Dwarf_Files *files, Dwarf_Addr entry, qint32 parentLocationId);
- int insertSubprogram(Dwarf_Die *top, Dwarf_Addr entry, qint32 binaryId, qint32 binaryPathId,
+ int insertSubprogram(CuDieRangeMapping *cudie, Dwarf_Die *top, Dwarf_Addr entry, qint32 binaryId, qint32 binaryPathId,
qint32 inlineParent, bool isKernel);
- qint32 parseDwarf(Dwarf_Die *cudie, Dwarf_Die *subroutine, Dwarf_Addr bias, qint32 binaryId,
- qint32 binaryPathId, bool isKernel);
+ qint32 parseDwarf(CuDieRangeMapping *cudie, SubProgramDie *subprogram, const QVector<Dwarf_Die> &inlined,
+ Dwarf_Addr bias, qint32 binaryId, qint32 binaryPathId, bool isKernel);
};
QT_BEGIN_NAMESPACE