From 69e2662a4da64d8b614f5d65d9e1064d55690469 Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Mon, 6 Jan 2020 12:35:17 +0100 Subject: Introduce per-DSO cache for symbol lookup via dwfl_module_addrinfo The symbol table isn't necessarily sorted, and thus repeated lookups in there can be expensive when a DSO has many entries in its symtab. For example, the librustc_driver from rustc 1.40.0 has about 202594 symbols. A single call to dwfl_module_addrinfo can take milliseconds on my laptop. Every time we get a sample at a so far unknown address, we have to find the corresponding symbol. So we called this function a lot, which can add up to a significant amount of time. Now, we cache the symbol name and its offset and size information in a sorted list and try to lookup the symbol there quickly. The impact of this patch on the overall time required to analyze a ~1GB perf.data file for a `cargo build` process (and it's child processes) is huge: before: ``` 447.681,66 msec task-clock:u # 0,989 CPUs utilized 0 context-switches:u # 0,000 K/sec 0 cpu-migrations:u # 0,000 K/sec 45.214 page-faults:u # 0,101 K/sec 1.272.289.956.854 cycles:u # 2,842 GHz 3.497.255.264.964 instructions:u # 2,75 insn per cycle 863.671.557.196 branches:u # 1929,209 M/sec 2.666.320.642 branch-misses:u # 0,31% of all branches 452,806895428 seconds time elapsed 441,996666000 seconds user 2,557237000 seconds sys ``` after: ``` 63.770,08 msec task-clock:u # 0,995 CPUs utilized 0 context-switches:u # 0,000 K/sec 0 cpu-migrations:u # 0,000 K/sec 35.102 page-faults:u # 0,550 K/sec 191.267.750.628 cycles:u # 2,999 GHz 501.316.536.714 instructions:u # 2,62 insn per cycle 122.234.405.333 branches:u # 1916,799 M/sec 443.671.470 branch-misses:u # 0,36% of all branches 64,063443896 seconds time elapsed 62,188041000 seconds user 1,136533000 seconds sys ``` That means we are now roughly 7x faster than before. Fixes: https://github.com/KDAB/hotspot/issues/225 Change-Id: Ib7dbc800c9372044a847de68a8459dd7f7b0d3da Reviewed-by: Ulf Hermann --- app/perfaddresscache.cpp | 35 ++++++++++++++++++++++++++++ app/perfaddresscache.h | 24 +++++++++++++++++++ app/perfsymboltable.cpp | 22 ++++++++++++----- tests/auto/addresscache/tst_addresscache.cpp | 27 +++++++++++++++++++++ 4 files changed, 102 insertions(+), 6 deletions(-) diff --git a/app/perfaddresscache.cpp b/app/perfaddresscache.cpp index 2bf05c0..27b646c 100644 --- a/app/perfaddresscache.cpp +++ b/app/perfaddresscache.cpp @@ -47,3 +47,38 @@ void PerfAddressCache::cache(const PerfElfMap::ElfInfo& elf, quint64 addr, else (*invalidAddressCache)[addr] = entry; } + +static bool operator<(const PerfAddressCache::SymbolCacheEntry &lhs, quint64 addr) +{ + return lhs.offset < addr; +} + +PerfAddressCache::SymbolCacheEntry PerfAddressCache::findSymbol(const PerfElfMap::ElfInfo& elf, + quint64 addr) const +{ + Q_ASSERT(elf.isValid()); + const auto &symbols = m_symbolCache.value(elf.originalPath); + const auto relAddr = relativeAddress(elf, addr); + auto it = std::lower_bound(symbols.begin(), symbols.end(), relAddr); + + if (it != symbols.end() && it->offset == relAddr) + return *it; + if (it == symbols.begin()) + return {}; + + --it; + + if (it->offset <= relAddr && it->offset + it->size > relAddr) + return *it; + return {}; +} + +void PerfAddressCache::cacheSymbol(const PerfElfMap::ElfInfo& elf, quint64 startAddr, quint64 size, + const QByteArray& symname) +{ + Q_ASSERT(elf.isValid()); + auto &symbols = m_symbolCache[elf.originalPath]; + const auto offset = relativeAddress(elf, startAddr); + auto it = std::lower_bound(symbols.begin(), symbols.end(), offset); + symbols.insert(it, {offset, size, symname}); +} diff --git a/app/perfaddresscache.h b/app/perfaddresscache.h index bfbb06e..e372e77 100644 --- a/app/perfaddresscache.h +++ b/app/perfaddresscache.h @@ -21,6 +21,8 @@ #define PERFADDRESSCACHE_H #include +#include + #include "perfelfmap.h" class PerfAddressCache @@ -38,12 +40,34 @@ public: }; using OffsetAddressCache = QHash; + struct SymbolCacheEntry + { + SymbolCacheEntry(quint64 offset = 0, quint64 size = 0, const QByteArray &symname = {}) + : offset(offset) + , size(size) + , symname(symname) + {} + + bool isValid() const { return size != 0; } + + quint64 offset; + quint64 size; + QByteArray symname; + }; + using SymbolCache = QVector; + AddressCacheEntry find(const PerfElfMap::ElfInfo& elf, quint64 addr, OffsetAddressCache *invalidAddressCache) const; void cache(const PerfElfMap::ElfInfo& elf, quint64 addr, const AddressCacheEntry& entry, OffsetAddressCache *invalidAddressCache); + + SymbolCacheEntry findSymbol(const PerfElfMap::ElfInfo &elf, quint64 addr) const; + void cacheSymbol(const PerfElfMap::ElfInfo &elf, quint64 startAddr, quint64 size, + const QByteArray &symname); private: QHash m_cache; + QHash m_symbolCache; }; +Q_DECLARE_TYPEINFO(PerfAddressCache::SymbolCacheEntry, Q_MOVABLE_TYPE); #endif diff --git a/app/perfsymboltable.cpp b/app/perfsymboltable.cpp index f260fd3..8383b47 100644 --- a/app/perfsymboltable.cpp +++ b/app/perfsymboltable.cpp @@ -894,8 +894,10 @@ static QByteArray fakeSymbolFromSection(Dwfl_Module *mod, Dwarf_Addr addr) int PerfSymbolTable::lookupFrame(Dwarf_Addr ip, bool isKernel, bool *isInterworking) { + auto addressCache = m_unwind->addressCache(); + const auto& elf = findElf(ip); - auto cached = m_unwind->addressCache()->find(elf, ip, &m_invalidAddressCache); + auto cached = addressCache->find(elf, ip, &m_invalidAddressCache); if (cached.isValid()) { *isInterworking = cached.isInterworking; return cached.locationId; @@ -918,13 +920,21 @@ int PerfSymbolTable::lookupFrame(Dwarf_Addr ip, bool isKernel, PerfUnwind::Location functionLocation(addressLocation); QByteArray symname; - GElf_Sym sym; GElf_Off off = 0; if (mod) { - // For addrinfo we need the raw pointer into symtab, so we need to adjust ourselves. - symname = dwfl_module_addrinfo(mod, addressLocation.address, &off, &sym, nullptr, nullptr, - nullptr); + auto cachedAddrInfo = addressCache->findSymbol(elf, addressLocation.address); + if (cachedAddrInfo.isValid()) { + off = addressLocation.address - elf.addr - cachedAddrInfo.offset; + symname = cachedAddrInfo.symname; + } else { + GElf_Sym sym; + // For addrinfo we need the raw pointer into symtab, so we need to adjust ourselves. + symname = dwfl_module_addrinfo(mod, addressLocation.address, &off, &sym, nullptr, nullptr, + nullptr); + if (off != addressLocation.address) + addressCache->cacheSymbol(elf, addressLocation.address - off, sym.st_size, symname); + } if (off == addressLocation.address) {// no symbol found symname = fakeSymbolFromSection(mod, addressLocation.address); @@ -1023,7 +1033,7 @@ int PerfSymbolTable::lookupFrame(Dwarf_Addr ip, bool isKernel, int locationId = m_unwind->resolveLocation(addressLocation); *isInterworking = (symname == "$a" || symname == "$t"); - m_unwind->addressCache()->cache(elf, ip, {locationId, *isInterworking}, &m_invalidAddressCache); + addressCache->cache(elf, ip, {locationId, *isInterworking}, &m_invalidAddressCache); return locationId; } diff --git a/tests/auto/addresscache/tst_addresscache.cpp b/tests/auto/addresscache/tst_addresscache.cpp index 604274e..c17df81 100644 --- a/tests/auto/addresscache/tst_addresscache.cpp +++ b/tests/auto/addresscache/tst_addresscache.cpp @@ -61,6 +61,33 @@ private slots: PerfAddressCache::OffsetAddressCache invalidAddressCache; QCOMPARE(cache.find(PerfElfMap::ElfInfo{}, 0x123, &invalidAddressCache).locationId, -1); } + + void testSymbolCache() + { + PerfElfMap::ElfInfo info_a{{}, 0x100, 100, 0, + QByteArrayLiteral("libfoo_a.so"), + QByteArrayLiteral("/usr/lib/libfoo_a.so")}; + PerfElfMap::ElfInfo info_b{{}, 0x200, 100, 0, + QByteArrayLiteral("libfoo_b.so"), + QByteArrayLiteral("/usr/lib/libfoo_b.so")}; + + PerfAddressCache cache; + + QVERIFY(!cache.findSymbol(info_a, 0x100).isValid()); + QVERIFY(!cache.findSymbol(info_b, 0x100).isValid()); + + cache.cacheSymbol(info_a, 0x100, 10, "Foo"); + for (auto addr : {0x100, 0x100 + 9}) { + const auto cached = cache.findSymbol(info_a, addr); + QVERIFY(cached.isValid()); + QCOMPARE(cached.offset, 0); + QCOMPARE(cached.size, 10); + QCOMPARE(cached.symname, "Foo"); + } + QVERIFY(!cache.findSymbol(info_a, 0x100 + 10).isValid()); + QVERIFY(!cache.findSymbol(info_b, 0x100).isValid()); + QVERIFY(!cache.findSymbol(info_b, 0x100 + 9).isValid()); + } }; QTEST_GUILESS_MAIN(TestAddressCache) -- cgit v1.2.3