summaryrefslogtreecommitdiffstats
path: root/tests/auto/addresscache/tst_addresscache.cpp
diff options
context:
space:
mode:
authorMilian Wolff <milian.wolff@kdab.com>2020-07-01 22:12:05 +0200
committerMilian Wolff <milian.wolff@kdab.com>2020-09-11 10:31:19 +0000
commitd8d56b7e45cf676a50c5ab14bba68a79450854ab (patch)
treecf368abe32a04a8eb6290cbe8fc6caeeb0b3e7e2 /tests/auto/addresscache/tst_addresscache.cpp
parentf1dc7259cbdc12b94d4ce2395ce0da019c4901f0 (diff)
Build a sorted symbol map once and use it for lookups
Apparently the symtab isn't necessarily sorted. That means every call to dwfl_module_addrinfo we do may potentially end up iterating over many symbol entries to find a match. For libxul.so, which contains roughly one million symbols, this can be exceedingly slow. Now, we instead just iterate over all symbols once and store them in a sorted array for quick lookups later on. Then when we lookup a symbol, we just need to demangle it on-demand. I believe the following numbers speak for themselves. Both are for a 1.1GB perf.data file profiling firefox with debug symbols in libxul. A hefty 10x speedup! Before: ``` 592.765,37 msec task-clock:u # 0,999 CPUs utilized 0 context-switches:u # 0,000 K/sec 0 cpu-migrations:u # 0,000 K/sec 587.913 page-faults:u # 0,992 K/sec 2.610.836.174.604 cycles:u # 4,405 GHz (83,33%) 9.249.001.490 stalled-cycles-frontend:u # 0,35% frontend cycles idle (83,33%) 188.323.380.515 stalled-cycles-backend:u # 7,21% backend cycles idle (83,33%) 6.294.821.871.279 instructions:u # 2,41 insn per cycle # 0,03 stalled cycles per insn (83,33%) 1.593.493.508.805 branches:u # 2688,236 M/sec (83,33%) 1.613.875.121 branch-misses:u # 0,10% of all branches (83,34%) 593,078170383 seconds time elapsed 589,590379000 seconds user 1,591781000 seconds sys ``` After: ``` 57.292,74 msec task-clock:u # 0,999 CPUs utilized 0 context-switches:u # 0,000 K/sec 0 cpu-migrations:u # 0,000 K/sec 598.808 page-faults:u # 0,010 M/sec 246.209.111.444 cycles:u # 4,297 GHz (83,34%) 8.990.996.482 stalled-cycles-frontend:u # 3,65% frontend cycles idle (83,33%) 52.443.604.272 stalled-cycles-backend:u # 21,30% backend cycles idle (83,34%) 583.136.689.772 instructions:u # 2,37 insn per cycle # 0,09 stalled cycles per insn (83,33%) 150.053.278.261 branches:u # 2619,063 M/sec (83,33%) 833.143.959 branch-misses:u # 0,56% of all branches (83,34%) 57,370841100 seconds time elapsed 55,799767000 seconds user 1,291568000 seconds sys ``` Note that this patch also uncovers some broken offset computations. Checking the offsets manually with addr2line indicates that the new offsets we report now are better than the old ones. At least for the cases I compared, e.g.: ``` $ addr2line -C -i -f -e .../fork -a 255800 22c800 0x0000000000255800 __vfwprintf_internal fork.c:? 0x000000000022c800 __cos_fma ??:? $ addr2line -C -i -f -e .../fork -a 252585 229585 0x0000000000252585 printf_positional fork.c:? 0x0000000000229585 main ??:? $ addr2line -C -i -f -e .../vector_static_gcc_v9.1.0 -a 45d3e0 0x000000000045d3e0 __munmap crtstuff.c:? ``` Then, we now resolve symbols like binutils, i.e. we pick the first symbol we find and don't skip weak symbols like eu-addr2line seems to be doing. I.e. for this: ``` 0000000000417a40 w F .text 0000000000000074 hypot 0000000000417a40 w F .text 0000000000000074 hypotf64 0000000000417a40 w F .text 0000000000000074 hypotf32x 0000000000417a40 g F .text 0000000000000074 __hypot ``` We used to get `__hypot`, but now we get `hypot`. I think this is just as good, and as I said - it's also what you'd get from binutils with `addr2line`: ``` $ addr2line -C -i -f -e vector_static_gcc_v9.1.0 -a 417a40 0x0000000000418480 hypot ??:? $ eu-addr2line -C -i -f -e vector_static_gcc_v9.1.0 -a 417a40 0x0000000000418480 __hypot ??:0 ``` Initially, I thought about just skipping all weak symbols, but that's not a feasible approach. There are some symbols that are weak and not overridden by a non-weak symbol, like this one: ``` $ objdump -C -t .../vector_static_gcc_v9.1.0 | grep 401c70 0000000000401c70 w F .text 0000000000000162 void std::vector<double, std::allocator<double> >::_M_realloc_insert<double>(__gnu_cxx::__normal_iterator<double*, std::vector<double, std::allocator<double> > >, double&&) ``` And that one even contains a bunch of inlined frames, so we definitely want to keep that in. We could potentially pass that information along and then implement a custom logic to prefer non-weak symbols. Quite frankly, I don't think that effort is worth it. Change-Id: Ic91764aaab36e77be1c4df4a32d4ac2b4c28e7e0 Reviewed-by: Ulf Hermann <ulf.hermann@qt.io>
Diffstat (limited to 'tests/auto/addresscache/tst_addresscache.cpp')
-rw-r--r--tests/auto/addresscache/tst_addresscache.cpp38
1 files changed, 11 insertions, 27 deletions
diff --git a/tests/auto/addresscache/tst_addresscache.cpp b/tests/auto/addresscache/tst_addresscache.cpp
index 5df3ae4..8136682 100644
--- a/tests/auto/addresscache/tst_addresscache.cpp
+++ b/tests/auto/addresscache/tst_addresscache.cpp
@@ -64,41 +64,25 @@ private slots:
void testSymbolCache()
{
- PerfElfMap::ElfInfo info_a{{}, 0x100, 100, 0,
- QByteArrayLiteral("libfoo_a.so"),
- QByteArrayLiteral("/usr/lib/libfoo_a.so")};
- PerfElfMap::ElfInfo info_a_offset{{}, 0x200, 100, 0x100,
- QByteArrayLiteral("libfoo_a.so"),
- QByteArrayLiteral("/usr/lib/libfoo_a.so")};
- info_a_offset.baseAddr = info_a.addr;
- PerfElfMap::ElfInfo info_b{{}, 0x100, 100, 0,
- QByteArrayLiteral("libfoo_b.so"),
- QByteArrayLiteral("/usr/lib/libfoo_b.so")};
+ const auto libfoo_a = QByteArrayLiteral("/usr/lib/libfoo_a.so");
+ const auto libfoo_b = QByteArrayLiteral("/usr/lib/libfoo_b.so");
PerfAddressCache cache;
- QVERIFY(!cache.findSymbol(info_a, 0x100).isValid());
- QVERIFY(!cache.findSymbol(info_a_offset, 0x100).isValid());
- QVERIFY(!cache.findSymbol(info_a_offset, 0x200).isValid());
- QVERIFY(!cache.findSymbol(info_b, 0x100).isValid());
+ QVERIFY(!cache.findSymbol(libfoo_a, 0).isValid());
+ QVERIFY(!cache.findSymbol(libfoo_b, 0).isValid());
- cache.cacheSymbol(info_a, 0x100, 10, "Foo");
+ cache.setSymbolCache(libfoo_a, {{0x100, 10, "Foo"}});
for (auto addr : {0x100, 0x100 + 9}) {
- const auto cached = cache.findSymbol(info_a, addr);
+ const auto cached = cache.findSymbol(libfoo_a, addr);
QVERIFY(cached.isValid());
- QCOMPARE(int(cached.offset), 0);
- QCOMPARE(int(cached.size), 10);
+ QCOMPARE(cached.offset, 0x100);
+ QCOMPARE(cached.size, 10);
QCOMPARE(cached.symname, "Foo");
- const auto cached2 = cache.findSymbol(info_a_offset, addr);
- QCOMPARE(cached2.isValid(), cached.isValid());
- QCOMPARE(cached2.offset, cached.offset);
- QCOMPARE(cached2.size, cached.size);
- QCOMPARE(cached2.symname, cached.symname);
}
- QVERIFY(!cache.findSymbol(info_a, 0x100 + 10).isValid());
- QVERIFY(!cache.findSymbol(info_a_offset, 0x100 + 10).isValid());
- QVERIFY(!cache.findSymbol(info_b, 0x100).isValid());
- QVERIFY(!cache.findSymbol(info_b, 0x100 + 9).isValid());
+ QVERIFY(!cache.findSymbol(libfoo_a, 0x100 + 10).isValid());
+ QVERIFY(!cache.findSymbol(libfoo_b, 0x100).isValid());
+ QVERIFY(!cache.findSymbol(libfoo_b, 0x100 + 9).isValid());
}
};