diff options
author | Milian Wolff <milian.wolff@kdab.com> | 2020-01-06 09:24:34 +0100 |
---|---|---|
committer | Milian Wolff <milian.wolff@kdab.com> | 2020-01-09 19:24:03 +0000 |
commit | 807cccbb89d58da774c73e9bc3c1bddc6e8e653d (patch) | |
tree | 48051cc5b66a22f7c58ffd66252e4dee517c78ea | |
parent | 37dce9049204ea8ed420686cb407aad0be3d7950 (diff) |
Share per-DSO address cache across processes
When we profile a multi-process ensemble, it will often happen that
we encounter samples at the relative address of a DSO. In such cases,
we can leverage a central cache to store the information, instead of
recomputing the same data for every process.
As an example, I wrote a shell script that runs the same process four
times in parallel. When I parse the resulting perf.data file, the perf
stat results are as follows:
before:
```
Performance counter stats for '/home/milian/projects/compiled/other/lib/libexec/hotspot-perfparser --input ./perf.data --output /dev/null':
4.240,50 msec task-clock:u # 0,956 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
17.389 page-faults:u # 0,004 M/sec
11.195.771.907 cycles:u # 2,640 GHz
26.585.168.652 instructions:u # 2,37 insn per cycle
6.234.491.027 branches:u # 1470,227 M/sec
35.149.387 branch-misses:u # 0,56% of all branches
4,435152034 seconds time elapsed
3,732758000 seconds user
0,490148000 seconds sys
```
after:
```
Performance counter stats for '/home/milian/projects/compiled/other/lib/libexec/hotspot-perfparser --input ./perf.data --output /dev/null':
4.160,90 msec task-clock:u # 0,979 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
15.476 page-faults:u # 0,004 M/sec
10.635.798.451 cycles:u # 2,556 GHz
16.616.035.720 instructions:u # 1,56 insn per cycle
3.838.148.777 branches:u # 922,433 M/sec
24.902.558 branch-misses:u # 0,65% of all branches
4,249408917 seconds time elapsed
3,612442000 seconds user
0,533933000 seconds sys
```
Note that the overall elapsed time doesn't change that much here,
but the amount of instructions required is massively reduced. I bet
there are other situations where this patch will bring a more tangible
improvement to the overall time requirement.
Change-Id: I4531ec648af40dd44b9e4290fab7bbd2a89609da
Reviewed-by: Ulf Hermann <ulf.hermann@qt.io>
-rw-r--r-- | app/perfaddresscache.cpp | 26 | ||||
-rw-r--r-- | app/perfaddresscache.h | 9 | ||||
-rw-r--r-- | app/perfsymboltable.cpp | 6 | ||||
-rw-r--r-- | app/perfsymboltable.h | 2 | ||||
-rw-r--r-- | app/perfunwind.h | 3 | ||||
-rw-r--r-- | tests/auto/addresscache/tst_addresscache.cpp | 17 |
6 files changed, 36 insertions, 27 deletions
diff --git a/app/perfaddresscache.cpp b/app/perfaddresscache.cpp index 7f440b1..2bf05c0 100644 --- a/app/perfaddresscache.cpp +++ b/app/perfaddresscache.cpp @@ -22,28 +22,28 @@ namespace { quint64 relativeAddress(const PerfElfMap::ElfInfo& elf, quint64 addr) { - if (!elf.isValid()) - return addr; - + Q_ASSERT(elf.isValid()); Q_ASSERT(elf.addr <= addr); Q_ASSERT((elf.addr + elf.length) > addr); return addr - elf.addr; } } -PerfAddressCache::AddressCacheEntry PerfAddressCache::find(const PerfElfMap::ElfInfo& elf, - quint64 addr) const +PerfAddressCache::AddressCacheEntry PerfAddressCache::find(const PerfElfMap::ElfInfo& elf, quint64 addr, + OffsetAddressCache *invalidAddressCache) const { - return m_cache.value(elf.originalPath).value(relativeAddress(elf, addr)); + if (elf.isValid()) + return m_cache.value(elf.originalPath).value(relativeAddress(elf, addr)); + else + return invalidAddressCache->value(addr); } void PerfAddressCache::cache(const PerfElfMap::ElfInfo& elf, quint64 addr, - const PerfAddressCache::AddressCacheEntry& entry) -{ - m_cache[elf.originalPath][relativeAddress(elf, addr)] = entry; -} - -void PerfAddressCache::clearInvalid() + const PerfAddressCache::AddressCacheEntry& entry, + OffsetAddressCache *invalidAddressCache) { - m_cache[{}].clear(); + if (elf.isValid()) + m_cache[elf.originalPath][relativeAddress(elf, addr)] = entry; + else + (*invalidAddressCache)[addr] = entry; } diff --git a/app/perfaddresscache.h b/app/perfaddresscache.h index 92751f9..bfbb06e 100644 --- a/app/perfaddresscache.h +++ b/app/perfaddresscache.h @@ -36,13 +36,14 @@ public: int locationId; bool isInterworking; }; + using OffsetAddressCache = QHash<quint64, AddressCacheEntry>; - AddressCacheEntry find(const PerfElfMap::ElfInfo& elf, quint64 addr) const; + AddressCacheEntry find(const PerfElfMap::ElfInfo& elf, quint64 addr, + OffsetAddressCache *invalidAddressCache) const; void cache(const PerfElfMap::ElfInfo& elf, quint64 addr, - const AddressCacheEntry& entry); - void clearInvalid(); + const AddressCacheEntry& entry, OffsetAddressCache *invalidAddressCache); private: - QHash<QByteArray, QHash<quint64, AddressCacheEntry>> m_cache; + QHash<QByteArray, OffsetAddressCache> m_cache; }; #endif diff --git a/app/perfsymboltable.cpp b/app/perfsymboltable.cpp index 2482dd4..f260fd3 100644 --- a/app/perfsymboltable.cpp +++ b/app/perfsymboltable.cpp @@ -895,7 +895,7 @@ int PerfSymbolTable::lookupFrame(Dwarf_Addr ip, bool isKernel, bool *isInterworking) { const auto& elf = findElf(ip); - auto cached = m_addressCache.find(elf, ip); + auto cached = m_unwind->addressCache()->find(elf, ip, &m_invalidAddressCache); if (cached.isValid()) { *isInterworking = cached.isInterworking; return cached.locationId; @@ -1023,7 +1023,7 @@ int PerfSymbolTable::lookupFrame(Dwarf_Addr ip, bool isKernel, int locationId = m_unwind->resolveLocation(addressLocation); *isInterworking = (symname == "$a" || symname == "$t"); - m_addressCache.cache(elf, ip, {locationId, *isInterworking}); + m_unwind->addressCache()->cache(elf, ip, {locationId, *isInterworking}, &m_invalidAddressCache); return locationId; } @@ -1095,7 +1095,7 @@ Dwfl *PerfSymbolTable::attachDwfl(void *arg) void PerfSymbolTable::clearCache() { - m_addressCache.clearInvalid(); + m_invalidAddressCache.clear(); m_cuDieRanges.clear(); m_perfMap.clear(); if (m_perfMapFile.isOpen()) diff --git a/app/perfsymboltable.h b/app/perfsymboltable.h index 9687763..d41b683 100644 --- a/app/perfsymboltable.h +++ b/app/perfsymboltable.h @@ -110,7 +110,7 @@ private: ElfAndFile m_firstElf; PerfElfMap m_elfs; - PerfAddressCache m_addressCache; + PerfAddressCache::OffsetAddressCache m_invalidAddressCache; QHash<Dwfl_Module*, CuDieRanges> m_cuDieRanges; Dwfl_Callbacks *m_callbacks; qint32 m_pid; diff --git a/app/perfunwind.h b/app/perfunwind.h index a400c80..cb4b9ed 100644 --- a/app/perfunwind.h +++ b/app/perfunwind.h @@ -24,6 +24,7 @@ #include "perfkallsyms.h" #include "perfregisterinfo.h" #include "perftracingdata.h" +#include "perfaddresscache.h" #include <libdwfl.h> @@ -208,6 +209,7 @@ public: void resolveSymbol(int locationId, const Symbol &symbol); PerfKallsymEntry findKallsymEntry(quint64 address); + PerfAddressCache *addressCache() { return &m_addressCache; } enum ErrorCode { TimeOrderViolation = 1, @@ -288,6 +290,7 @@ private: QList<TaskEvent> m_taskEventsBuffer; QHash<qint32, PerfSymbolTable *> m_symbolTables; PerfKallsyms m_kallsyms; + PerfAddressCache m_addressCache; PerfTracingData m_tracingData; QHash<QByteArray, qint32> m_strings; diff --git a/tests/auto/addresscache/tst_addresscache.cpp b/tests/auto/addresscache/tst_addresscache.cpp index 2086410..604274e 100644 --- a/tests/auto/addresscache/tst_addresscache.cpp +++ b/tests/auto/addresscache/tst_addresscache.cpp @@ -37,24 +37,29 @@ private slots: info_b.addr = 0x200; PerfAddressCache cache; + PerfAddressCache::OffsetAddressCache invalidAddressCache; PerfAddressCache::AddressCacheEntry entry{42, true}; - cache.cache(info_a, 0x110, entry); - QCOMPARE(cache.find(info_a, 0x110).locationId, entry.locationId); - QCOMPARE(cache.find(info_b, 0x210).locationId, entry.locationId); + cache.cache(info_a, 0x110, entry, &invalidAddressCache); + QCOMPARE(cache.find(info_a, 0x110, &invalidAddressCache).locationId, entry.locationId); + QCOMPARE(cache.find(info_b, 0x210, &invalidAddressCache).locationId, entry.locationId); } void testInvalid() { PerfAddressCache cache; + PerfAddressCache::OffsetAddressCache invalidAddressCache_a; + PerfAddressCache::OffsetAddressCache invalidAddressCache_b; PerfAddressCache::AddressCacheEntry entry{42, true}; - cache.cache(PerfElfMap::ElfInfo{}, 0x110, entry); - QCOMPARE(cache.find(PerfElfMap::ElfInfo{}, 0x110).locationId, entry.locationId); + cache.cache(PerfElfMap::ElfInfo{}, 0x110, entry, &invalidAddressCache_a); + QCOMPARE(cache.find(PerfElfMap::ElfInfo{}, 0x110, &invalidAddressCache_a).locationId, entry.locationId); + QCOMPARE(cache.find(PerfElfMap::ElfInfo{}, 0x110, &invalidAddressCache_b).locationId, -1); } void testEmpty() { PerfAddressCache cache; - QCOMPARE(cache.find(PerfElfMap::ElfInfo{}, 0x123).locationId, -1); + PerfAddressCache::OffsetAddressCache invalidAddressCache; + QCOMPARE(cache.find(PerfElfMap::ElfInfo{}, 0x123, &invalidAddressCache).locationId, -1); } }; |