summaryrefslogtreecommitdiffstats
path: root/tests/auto/addresscache/tst_addresscache.cpp
diff options
context:
space:
mode:
authorMilian Wolff <milian.wolff@kdab.com>2020-01-06 12:35:17 +0100
committerMilian Wolff <milian.wolff@kdab.com>2020-01-09 19:24:09 +0000
commit69e2662a4da64d8b614f5d65d9e1064d55690469 (patch)
tree9b40d499884bf7f0eb785526fcce0487cb8c28e6 /tests/auto/addresscache/tst_addresscache.cpp
parent807cccbb89d58da774c73e9bc3c1bddc6e8e653d (diff)
Introduce per-DSO cache for symbol lookup via dwfl_module_addrinfo
The symbol table isn't necessarily sorted, and thus repeated lookups in there can be expensive when a DSO has many entries in its symtab. For example, the librustc_driver from rustc 1.40.0 has about 202594 symbols. A single call to dwfl_module_addrinfo can take milliseconds on my laptop. Every time we get a sample at a so far unknown address, we have to find the corresponding symbol. So we called this function a lot, which can add up to a significant amount of time. Now, we cache the symbol name and its offset and size information in a sorted list and try to lookup the symbol there quickly. The impact of this patch on the overall time required to analyze a ~1GB perf.data file for a `cargo build` process (and it's child processes) is huge: before: ``` 447.681,66 msec task-clock:u # 0,989 CPUs utilized 0 context-switches:u # 0,000 K/sec 0 cpu-migrations:u # 0,000 K/sec 45.214 page-faults:u # 0,101 K/sec 1.272.289.956.854 cycles:u # 2,842 GHz 3.497.255.264.964 instructions:u # 2,75 insn per cycle 863.671.557.196 branches:u # 1929,209 M/sec 2.666.320.642 branch-misses:u # 0,31% of all branches 452,806895428 seconds time elapsed 441,996666000 seconds user 2,557237000 seconds sys ``` after: ``` 63.770,08 msec task-clock:u # 0,995 CPUs utilized 0 context-switches:u # 0,000 K/sec 0 cpu-migrations:u # 0,000 K/sec 35.102 page-faults:u # 0,550 K/sec 191.267.750.628 cycles:u # 2,999 GHz 501.316.536.714 instructions:u # 2,62 insn per cycle 122.234.405.333 branches:u # 1916,799 M/sec 443.671.470 branch-misses:u # 0,36% of all branches 64,063443896 seconds time elapsed 62,188041000 seconds user 1,136533000 seconds sys ``` That means we are now roughly 7x faster than before. Fixes: https://github.com/KDAB/hotspot/issues/225 Change-Id: Ib7dbc800c9372044a847de68a8459dd7f7b0d3da Reviewed-by: Ulf Hermann <ulf.hermann@qt.io>
Diffstat (limited to 'tests/auto/addresscache/tst_addresscache.cpp')
-rw-r--r--tests/auto/addresscache/tst_addresscache.cpp27
1 files changed, 27 insertions, 0 deletions
diff --git a/tests/auto/addresscache/tst_addresscache.cpp b/tests/auto/addresscache/tst_addresscache.cpp
index 604274e..c17df81 100644
--- a/tests/auto/addresscache/tst_addresscache.cpp
+++ b/tests/auto/addresscache/tst_addresscache.cpp
@@ -61,6 +61,33 @@ private slots:
PerfAddressCache::OffsetAddressCache invalidAddressCache;
QCOMPARE(cache.find(PerfElfMap::ElfInfo{}, 0x123, &invalidAddressCache).locationId, -1);
}
+
+ void testSymbolCache()
+ {
+ PerfElfMap::ElfInfo info_a{{}, 0x100, 100, 0,
+ QByteArrayLiteral("libfoo_a.so"),
+ QByteArrayLiteral("/usr/lib/libfoo_a.so")};
+ PerfElfMap::ElfInfo info_b{{}, 0x200, 100, 0,
+ QByteArrayLiteral("libfoo_b.so"),
+ QByteArrayLiteral("/usr/lib/libfoo_b.so")};
+
+ PerfAddressCache cache;
+
+ QVERIFY(!cache.findSymbol(info_a, 0x100).isValid());
+ QVERIFY(!cache.findSymbol(info_b, 0x100).isValid());
+
+ cache.cacheSymbol(info_a, 0x100, 10, "Foo");
+ for (auto addr : {0x100, 0x100 + 9}) {
+ const auto cached = cache.findSymbol(info_a, addr);
+ QVERIFY(cached.isValid());
+ QCOMPARE(cached.offset, 0);
+ QCOMPARE(cached.size, 10);
+ QCOMPARE(cached.symname, "Foo");
+ }
+ QVERIFY(!cache.findSymbol(info_a, 0x100 + 10).isValid());
+ QVERIFY(!cache.findSymbol(info_b, 0x100).isValid());
+ QVERIFY(!cache.findSymbol(info_b, 0x100 + 9).isValid());
+ }
};
QTEST_GUILESS_MAIN(TestAddressCache)