summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMilian Wolff <milian.wolff@kdab.com>2020-01-06 12:35:17 +0100
committerMilian Wolff <milian.wolff@kdab.com>2020-01-09 19:24:09 +0000
commit69e2662a4da64d8b614f5d65d9e1064d55690469 (patch)
tree9b40d499884bf7f0eb785526fcce0487cb8c28e6
parent807cccbb89d58da774c73e9bc3c1bddc6e8e653d (diff)
Introduce per-DSO cache for symbol lookup via dwfl_module_addrinfo
The symbol table isn't necessarily sorted, and thus repeated lookups in there can be expensive when a DSO has many entries in its symtab. For example, the librustc_driver from rustc 1.40.0 has about 202594 symbols. A single call to dwfl_module_addrinfo can take milliseconds on my laptop. Every time we get a sample at a so far unknown address, we have to find the corresponding symbol. So we called this function a lot, which can add up to a significant amount of time. Now, we cache the symbol name and its offset and size information in a sorted list and try to lookup the symbol there quickly. The impact of this patch on the overall time required to analyze a ~1GB perf.data file for a `cargo build` process (and it's child processes) is huge: before: ``` 447.681,66 msec task-clock:u # 0,989 CPUs utilized 0 context-switches:u # 0,000 K/sec 0 cpu-migrations:u # 0,000 K/sec 45.214 page-faults:u # 0,101 K/sec 1.272.289.956.854 cycles:u # 2,842 GHz 3.497.255.264.964 instructions:u # 2,75 insn per cycle 863.671.557.196 branches:u # 1929,209 M/sec 2.666.320.642 branch-misses:u # 0,31% of all branches 452,806895428 seconds time elapsed 441,996666000 seconds user 2,557237000 seconds sys ``` after: ``` 63.770,08 msec task-clock:u # 0,995 CPUs utilized 0 context-switches:u # 0,000 K/sec 0 cpu-migrations:u # 0,000 K/sec 35.102 page-faults:u # 0,550 K/sec 191.267.750.628 cycles:u # 2,999 GHz 501.316.536.714 instructions:u # 2,62 insn per cycle 122.234.405.333 branches:u # 1916,799 M/sec 443.671.470 branch-misses:u # 0,36% of all branches 64,063443896 seconds time elapsed 62,188041000 seconds user 1,136533000 seconds sys ``` That means we are now roughly 7x faster than before. Fixes: https://github.com/KDAB/hotspot/issues/225 Change-Id: Ib7dbc800c9372044a847de68a8459dd7f7b0d3da Reviewed-by: Ulf Hermann <ulf.hermann@qt.io>
-rw-r--r--app/perfaddresscache.cpp35
-rw-r--r--app/perfaddresscache.h24
-rw-r--r--app/perfsymboltable.cpp22
-rw-r--r--tests/auto/addresscache/tst_addresscache.cpp27
4 files changed, 102 insertions, 6 deletions
diff --git a/app/perfaddresscache.cpp b/app/perfaddresscache.cpp
index 2bf05c0..27b646c 100644
--- a/app/perfaddresscache.cpp
+++ b/app/perfaddresscache.cpp
@@ -47,3 +47,38 @@ void PerfAddressCache::cache(const PerfElfMap::ElfInfo& elf, quint64 addr,
else
(*invalidAddressCache)[addr] = entry;
}
+
+static bool operator<(const PerfAddressCache::SymbolCacheEntry &lhs, quint64 addr)
+{
+ return lhs.offset < addr;
+}
+
+PerfAddressCache::SymbolCacheEntry PerfAddressCache::findSymbol(const PerfElfMap::ElfInfo& elf,
+ quint64 addr) const
+{
+ Q_ASSERT(elf.isValid());
+ const auto &symbols = m_symbolCache.value(elf.originalPath);
+ const auto relAddr = relativeAddress(elf, addr);
+ auto it = std::lower_bound(symbols.begin(), symbols.end(), relAddr);
+
+ if (it != symbols.end() && it->offset == relAddr)
+ return *it;
+ if (it == symbols.begin())
+ return {};
+
+ --it;
+
+ if (it->offset <= relAddr && it->offset + it->size > relAddr)
+ return *it;
+ return {};
+}
+
+void PerfAddressCache::cacheSymbol(const PerfElfMap::ElfInfo& elf, quint64 startAddr, quint64 size,
+ const QByteArray& symname)
+{
+ Q_ASSERT(elf.isValid());
+ auto &symbols = m_symbolCache[elf.originalPath];
+ const auto offset = relativeAddress(elf, startAddr);
+ auto it = std::lower_bound(symbols.begin(), symbols.end(), offset);
+ symbols.insert(it, {offset, size, symname});
+}
diff --git a/app/perfaddresscache.h b/app/perfaddresscache.h
index bfbb06e..e372e77 100644
--- a/app/perfaddresscache.h
+++ b/app/perfaddresscache.h
@@ -21,6 +21,8 @@
#define PERFADDRESSCACHE_H
#include <QHash>
+#include <QVector>
+
#include "perfelfmap.h"
class PerfAddressCache
@@ -38,12 +40,34 @@ public:
};
using OffsetAddressCache = QHash<quint64, AddressCacheEntry>;
+ struct SymbolCacheEntry
+ {
+ SymbolCacheEntry(quint64 offset = 0, quint64 size = 0, const QByteArray &symname = {})
+ : offset(offset)
+ , size(size)
+ , symname(symname)
+ {}
+
+ bool isValid() const { return size != 0; }
+
+ quint64 offset;
+ quint64 size;
+ QByteArray symname;
+ };
+ using SymbolCache = QVector<SymbolCacheEntry>;
+
AddressCacheEntry find(const PerfElfMap::ElfInfo& elf, quint64 addr,
OffsetAddressCache *invalidAddressCache) const;
void cache(const PerfElfMap::ElfInfo& elf, quint64 addr,
const AddressCacheEntry& entry, OffsetAddressCache *invalidAddressCache);
+
+ SymbolCacheEntry findSymbol(const PerfElfMap::ElfInfo &elf, quint64 addr) const;
+ void cacheSymbol(const PerfElfMap::ElfInfo &elf, quint64 startAddr, quint64 size,
+ const QByteArray &symname);
private:
QHash<QByteArray, OffsetAddressCache> m_cache;
+ QHash<QByteArray, SymbolCache> m_symbolCache;
};
+Q_DECLARE_TYPEINFO(PerfAddressCache::SymbolCacheEntry, Q_MOVABLE_TYPE);
#endif
diff --git a/app/perfsymboltable.cpp b/app/perfsymboltable.cpp
index f260fd3..8383b47 100644
--- a/app/perfsymboltable.cpp
+++ b/app/perfsymboltable.cpp
@@ -894,8 +894,10 @@ static QByteArray fakeSymbolFromSection(Dwfl_Module *mod, Dwarf_Addr addr)
int PerfSymbolTable::lookupFrame(Dwarf_Addr ip, bool isKernel,
bool *isInterworking)
{
+ auto addressCache = m_unwind->addressCache();
+
const auto& elf = findElf(ip);
- auto cached = m_unwind->addressCache()->find(elf, ip, &m_invalidAddressCache);
+ auto cached = addressCache->find(elf, ip, &m_invalidAddressCache);
if (cached.isValid()) {
*isInterworking = cached.isInterworking;
return cached.locationId;
@@ -918,13 +920,21 @@ int PerfSymbolTable::lookupFrame(Dwarf_Addr ip, bool isKernel,
PerfUnwind::Location functionLocation(addressLocation);
QByteArray symname;
- GElf_Sym sym;
GElf_Off off = 0;
if (mod) {
- // For addrinfo we need the raw pointer into symtab, so we need to adjust ourselves.
- symname = dwfl_module_addrinfo(mod, addressLocation.address, &off, &sym, nullptr, nullptr,
- nullptr);
+ auto cachedAddrInfo = addressCache->findSymbol(elf, addressLocation.address);
+ if (cachedAddrInfo.isValid()) {
+ off = addressLocation.address - elf.addr - cachedAddrInfo.offset;
+ symname = cachedAddrInfo.symname;
+ } else {
+ GElf_Sym sym;
+ // For addrinfo we need the raw pointer into symtab, so we need to adjust ourselves.
+ symname = dwfl_module_addrinfo(mod, addressLocation.address, &off, &sym, nullptr, nullptr,
+ nullptr);
+ if (off != addressLocation.address)
+ addressCache->cacheSymbol(elf, addressLocation.address - off, sym.st_size, symname);
+ }
if (off == addressLocation.address) {// no symbol found
symname = fakeSymbolFromSection(mod, addressLocation.address);
@@ -1023,7 +1033,7 @@ int PerfSymbolTable::lookupFrame(Dwarf_Addr ip, bool isKernel,
int locationId = m_unwind->resolveLocation(addressLocation);
*isInterworking = (symname == "$a" || symname == "$t");
- m_unwind->addressCache()->cache(elf, ip, {locationId, *isInterworking}, &m_invalidAddressCache);
+ addressCache->cache(elf, ip, {locationId, *isInterworking}, &m_invalidAddressCache);
return locationId;
}
diff --git a/tests/auto/addresscache/tst_addresscache.cpp b/tests/auto/addresscache/tst_addresscache.cpp
index 604274e..c17df81 100644
--- a/tests/auto/addresscache/tst_addresscache.cpp
+++ b/tests/auto/addresscache/tst_addresscache.cpp
@@ -61,6 +61,33 @@ private slots:
PerfAddressCache::OffsetAddressCache invalidAddressCache;
QCOMPARE(cache.find(PerfElfMap::ElfInfo{}, 0x123, &invalidAddressCache).locationId, -1);
}
+
+ void testSymbolCache()
+ {
+ PerfElfMap::ElfInfo info_a{{}, 0x100, 100, 0,
+ QByteArrayLiteral("libfoo_a.so"),
+ QByteArrayLiteral("/usr/lib/libfoo_a.so")};
+ PerfElfMap::ElfInfo info_b{{}, 0x200, 100, 0,
+ QByteArrayLiteral("libfoo_b.so"),
+ QByteArrayLiteral("/usr/lib/libfoo_b.so")};
+
+ PerfAddressCache cache;
+
+ QVERIFY(!cache.findSymbol(info_a, 0x100).isValid());
+ QVERIFY(!cache.findSymbol(info_b, 0x100).isValid());
+
+ cache.cacheSymbol(info_a, 0x100, 10, "Foo");
+ for (auto addr : {0x100, 0x100 + 9}) {
+ const auto cached = cache.findSymbol(info_a, addr);
+ QVERIFY(cached.isValid());
+ QCOMPARE(cached.offset, 0);
+ QCOMPARE(cached.size, 10);
+ QCOMPARE(cached.symname, "Foo");
+ }
+ QVERIFY(!cache.findSymbol(info_a, 0x100 + 10).isValid());
+ QVERIFY(!cache.findSymbol(info_b, 0x100).isValid());
+ QVERIFY(!cache.findSymbol(info_b, 0x100 + 9).isValid());
+ }
};
QTEST_GUILESS_MAIN(TestAddressCache)