summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorMilian Wolff <milian.wolff@kdab.com>2020-01-06 09:24:34 +0100
committerMilian Wolff <milian.wolff@kdab.com>2020-01-09 19:24:03 +0000
commit807cccbb89d58da774c73e9bc3c1bddc6e8e653d (patch)
tree48051cc5b66a22f7c58ffd66252e4dee517c78ea /tests
parent37dce9049204ea8ed420686cb407aad0be3d7950 (diff)
Share per-DSO address cache across processes
When we profile a multi-process ensemble, it will often happen that we encounter samples at the relative address of a DSO. In such cases, we can leverage a central cache to store the information, instead of recomputing the same data for every process. As an example, I wrote a shell script that runs the same process four times in parallel. When I parse the resulting perf.data file, the perf stat results are as follows: before: ``` Performance counter stats for '/home/milian/projects/compiled/other/lib/libexec/hotspot-perfparser --input ./perf.data --output /dev/null': 4.240,50 msec task-clock:u # 0,956 CPUs utilized 0 context-switches:u # 0,000 K/sec 0 cpu-migrations:u # 0,000 K/sec 17.389 page-faults:u # 0,004 M/sec 11.195.771.907 cycles:u # 2,640 GHz 26.585.168.652 instructions:u # 2,37 insn per cycle 6.234.491.027 branches:u # 1470,227 M/sec 35.149.387 branch-misses:u # 0,56% of all branches 4,435152034 seconds time elapsed 3,732758000 seconds user 0,490148000 seconds sys ``` after: ``` Performance counter stats for '/home/milian/projects/compiled/other/lib/libexec/hotspot-perfparser --input ./perf.data --output /dev/null': 4.160,90 msec task-clock:u # 0,979 CPUs utilized 0 context-switches:u # 0,000 K/sec 0 cpu-migrations:u # 0,000 K/sec 15.476 page-faults:u # 0,004 M/sec 10.635.798.451 cycles:u # 2,556 GHz 16.616.035.720 instructions:u # 1,56 insn per cycle 3.838.148.777 branches:u # 922,433 M/sec 24.902.558 branch-misses:u # 0,65% of all branches 4,249408917 seconds time elapsed 3,612442000 seconds user 0,533933000 seconds sys ``` Note that the overall elapsed time doesn't change that much here, but the amount of instructions required is massively reduced. I bet there are other situations where this patch will bring a more tangible improvement to the overall time requirement. Change-Id: I4531ec648af40dd44b9e4290fab7bbd2a89609da Reviewed-by: Ulf Hermann <ulf.hermann@qt.io>
Diffstat (limited to 'tests')
-rw-r--r--tests/auto/addresscache/tst_addresscache.cpp17
1 files changed, 11 insertions, 6 deletions
diff --git a/tests/auto/addresscache/tst_addresscache.cpp b/tests/auto/addresscache/tst_addresscache.cpp
index 2086410..604274e 100644
--- a/tests/auto/addresscache/tst_addresscache.cpp
+++ b/tests/auto/addresscache/tst_addresscache.cpp
@@ -37,24 +37,29 @@ private slots:
info_b.addr = 0x200;
PerfAddressCache cache;
+ PerfAddressCache::OffsetAddressCache invalidAddressCache;
PerfAddressCache::AddressCacheEntry entry{42, true};
- cache.cache(info_a, 0x110, entry);
- QCOMPARE(cache.find(info_a, 0x110).locationId, entry.locationId);
- QCOMPARE(cache.find(info_b, 0x210).locationId, entry.locationId);
+ cache.cache(info_a, 0x110, entry, &invalidAddressCache);
+ QCOMPARE(cache.find(info_a, 0x110, &invalidAddressCache).locationId, entry.locationId);
+ QCOMPARE(cache.find(info_b, 0x210, &invalidAddressCache).locationId, entry.locationId);
}
void testInvalid()
{
PerfAddressCache cache;
+ PerfAddressCache::OffsetAddressCache invalidAddressCache_a;
+ PerfAddressCache::OffsetAddressCache invalidAddressCache_b;
PerfAddressCache::AddressCacheEntry entry{42, true};
- cache.cache(PerfElfMap::ElfInfo{}, 0x110, entry);
- QCOMPARE(cache.find(PerfElfMap::ElfInfo{}, 0x110).locationId, entry.locationId);
+ cache.cache(PerfElfMap::ElfInfo{}, 0x110, entry, &invalidAddressCache_a);
+ QCOMPARE(cache.find(PerfElfMap::ElfInfo{}, 0x110, &invalidAddressCache_a).locationId, entry.locationId);
+ QCOMPARE(cache.find(PerfElfMap::ElfInfo{}, 0x110, &invalidAddressCache_b).locationId, -1);
}
void testEmpty()
{
PerfAddressCache cache;
- QCOMPARE(cache.find(PerfElfMap::ElfInfo{}, 0x123).locationId, -1);
+ PerfAddressCache::OffsetAddressCache invalidAddressCache;
+ QCOMPARE(cache.find(PerfElfMap::ElfInfo{}, 0x123, &invalidAddressCache).locationId, -1);
}
};