diff options
author | Milian Wolff <milian.wolff@kdab.com> | 2020-01-06 09:24:34 +0100 |
---|---|---|
committer | Milian Wolff <milian.wolff@kdab.com> | 2020-01-09 19:24:03 +0000 |
commit | 807cccbb89d58da774c73e9bc3c1bddc6e8e653d (patch) | |
tree | 48051cc5b66a22f7c58ffd66252e4dee517c78ea /tests | |
parent | 37dce9049204ea8ed420686cb407aad0be3d7950 (diff) |
Share per-DSO address cache across processes
When we profile a multi-process ensemble, it will often happen that
we encounter samples at the relative address of a DSO. In such cases,
we can leverage a central cache to store the information, instead of
recomputing the same data for every process.
As an example, I wrote a shell script that runs the same process four
times in parallel. When I parse the resulting perf.data file, the perf
stat results are as follows:
before:
```
Performance counter stats for '/home/milian/projects/compiled/other/lib/libexec/hotspot-perfparser --input ./perf.data --output /dev/null':
4.240,50 msec task-clock:u # 0,956 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
17.389 page-faults:u # 0,004 M/sec
11.195.771.907 cycles:u # 2,640 GHz
26.585.168.652 instructions:u # 2,37 insn per cycle
6.234.491.027 branches:u # 1470,227 M/sec
35.149.387 branch-misses:u # 0,56% of all branches
4,435152034 seconds time elapsed
3,732758000 seconds user
0,490148000 seconds sys
```
after:
```
Performance counter stats for '/home/milian/projects/compiled/other/lib/libexec/hotspot-perfparser --input ./perf.data --output /dev/null':
4.160,90 msec task-clock:u # 0,979 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
15.476 page-faults:u # 0,004 M/sec
10.635.798.451 cycles:u # 2,556 GHz
16.616.035.720 instructions:u # 1,56 insn per cycle
3.838.148.777 branches:u # 922,433 M/sec
24.902.558 branch-misses:u # 0,65% of all branches
4,249408917 seconds time elapsed
3,612442000 seconds user
0,533933000 seconds sys
```
Note that the overall elapsed time doesn't change that much here,
but the amount of instructions required is massively reduced. I bet
there are other situations where this patch will bring a more tangible
improvement to the overall time requirement.
Change-Id: I4531ec648af40dd44b9e4290fab7bbd2a89609da
Reviewed-by: Ulf Hermann <ulf.hermann@qt.io>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/auto/addresscache/tst_addresscache.cpp | 17 |
1 files changed, 11 insertions, 6 deletions
diff --git a/tests/auto/addresscache/tst_addresscache.cpp b/tests/auto/addresscache/tst_addresscache.cpp index 2086410..604274e 100644 --- a/tests/auto/addresscache/tst_addresscache.cpp +++ b/tests/auto/addresscache/tst_addresscache.cpp @@ -37,24 +37,29 @@ private slots: info_b.addr = 0x200; PerfAddressCache cache; + PerfAddressCache::OffsetAddressCache invalidAddressCache; PerfAddressCache::AddressCacheEntry entry{42, true}; - cache.cache(info_a, 0x110, entry); - QCOMPARE(cache.find(info_a, 0x110).locationId, entry.locationId); - QCOMPARE(cache.find(info_b, 0x210).locationId, entry.locationId); + cache.cache(info_a, 0x110, entry, &invalidAddressCache); + QCOMPARE(cache.find(info_a, 0x110, &invalidAddressCache).locationId, entry.locationId); + QCOMPARE(cache.find(info_b, 0x210, &invalidAddressCache).locationId, entry.locationId); } void testInvalid() { PerfAddressCache cache; + PerfAddressCache::OffsetAddressCache invalidAddressCache_a; + PerfAddressCache::OffsetAddressCache invalidAddressCache_b; PerfAddressCache::AddressCacheEntry entry{42, true}; - cache.cache(PerfElfMap::ElfInfo{}, 0x110, entry); - QCOMPARE(cache.find(PerfElfMap::ElfInfo{}, 0x110).locationId, entry.locationId); + cache.cache(PerfElfMap::ElfInfo{}, 0x110, entry, &invalidAddressCache_a); + QCOMPARE(cache.find(PerfElfMap::ElfInfo{}, 0x110, &invalidAddressCache_a).locationId, entry.locationId); + QCOMPARE(cache.find(PerfElfMap::ElfInfo{}, 0x110, &invalidAddressCache_b).locationId, -1); } void testEmpty() { PerfAddressCache cache; - QCOMPARE(cache.find(PerfElfMap::ElfInfo{}, 0x123).locationId, -1); + PerfAddressCache::OffsetAddressCache invalidAddressCache; + QCOMPARE(cache.find(PerfElfMap::ElfInfo{}, 0x123, &invalidAddressCache).locationId, -1); } }; |