summaryrefslogtreecommitdiffstats
path: root/src/testlib/qbenchmarkperfevents.cpp
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2012-04-09 14:11:47 -0300
committerThe Qt Project <gerrit-noreply@qt-project.org>2013-03-06 21:50:28 +0100
commitc63420a117fe67107466d806890e901d091cb1d5 (patch)
tree63c706a5d6b50f9efcb0db95c1ec80f99432b7d8 /src/testlib/qbenchmarkperfevents.cpp
parent9d72259f943a3b31fa4e32aeb1c5a2de3d8ca611 (diff)
Implement the Linux Perf Counter backend for benchlib
Currently, we only support one event type: counting CPU cycles with hardware counters. There are no fallbacks if this hardware counter is not available, and there is currently no way to specify other counters. Benchlib only supports reporting one event per benchmark, even though the event counter interface allows specifying more than one. Still, the hardware usually has limitations on how many events it can monitor at a time, and we'd prefer to have the counter running at 100% of the time, so this will not change. Change-Id: I79858a3ad1e696dc4b7b72c420e5a04b67cd55de Reviewed-by: Jason McDonald <macadder1@gmail.com>
Diffstat (limited to 'src/testlib/qbenchmarkperfevents.cpp')
-rw-r--r--src/testlib/qbenchmarkperfevents.cpp105
1 files changed, 104 insertions, 1 deletions
diff --git a/src/testlib/qbenchmarkperfevents.cpp b/src/testlib/qbenchmarkperfevents.cpp
index 8c2a4852b4..178d4cc7e8 100644
--- a/src/testlib/qbenchmarkperfevents.cpp
+++ b/src/testlib/qbenchmarkperfevents.cpp
@@ -44,15 +44,41 @@
#ifdef QTESTLIB_USE_PERF_EVENTS
+// include the qcore_unix_p.h without core-private
+// we only use inline functions anyway
+#include "../corelib/kernel/qcore_unix_p.h"
+
#include <sys/types.h>
#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
#include <sys/syscall.h>
+#include <sys/ioctl.h>
#include "3rdparty/linux_perf_event_p.h"
QT_BEGIN_NAMESPACE
+/*!
+ \class QBenchmarkPerfEvents
+ \brief The Linux perf events benchmark backend
+
+ This benchmark backend uses the Linux Performance Counters interface,
+ introduced with the Linux kernel v2.6.31. The interface is done by one
+ system call (perf_event_open) which takes an attribute structure and
+ returns a file descriptor.
+
+ More information:
+ \li design docs: tools/perf/design.txt <http://lxr.linux.no/linux/tools/perf/design.txt>
+ \li sample tool: tools/perf/builtin-stat.c <http://lxr.linux.no/linux/tools/perf/builtin-stat.c>
+ (note: as of v3.3.1, the documentation is out-of-date with the kernel
+ interface, so reading the source code of existing tools is necessary)
+
+ This benchlib backend monitors the current process as well as child process
+ launched. We do not try to benchmark in kernel or hypervisor mode, as that
+ usually requires elevated privileges.
+ */
static int perf_event_open(perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags)
{
@@ -67,27 +93,69 @@ bool QBenchmarkPerfEventsMeasurer::isAvailable()
}
QBenchmarkPerfEventsMeasurer::QBenchmarkPerfEventsMeasurer()
+ : fd(-1)
{
}
QBenchmarkPerfEventsMeasurer::~QBenchmarkPerfEventsMeasurer()
{
+ qt_safe_close(fd);
}
void QBenchmarkPerfEventsMeasurer::init()
{
+ perf_event_attr attr;
+ memset(&attr, 0, sizeof attr);
+
+ // common init
+ attr.size = sizeof attr;
+ attr.sample_period = 0;
+ attr.sample_type = 0;
+ attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
+ attr.disabled = true; // start disabled, we'll enable later
+ attr.inherit = true; // let children inherit, if the benchmark has child processes
+ attr.pinned = true; // keep it running on the PMU
+ attr.inherit_stat = true; // collapse all the info from child processes
+ attr.task = true; // trace fork and exit
+
+ // our event type
+ // ### FIXME hardcoded for now
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.config = PERF_COUNT_HW_CPU_CYCLES;
+
+ // pid == 0 -> attach to the current process
+ // cpu == -1 -> monitor on all CPUs
+ // group_fd == -1 -> this is the group leader
+ // flags == 0 -> reserved, must be zero
+ fd = perf_event_open(&attr, 0, -1, -1, 0);
+ if (fd == -1) {
+ perror("QBenchmarkPerfEventsMeasurer::start: perf_event_open");
+ exit(1);
+ } else {
+ ::fcntl(fd, F_SETFD, FD_CLOEXEC);
+ }
}
void QBenchmarkPerfEventsMeasurer::start()
{
+ // enable the counter
+ ::ioctl(fd, PERF_EVENT_IOC_RESET);
+ ::ioctl(fd, PERF_EVENT_IOC_ENABLE);
}
qint64 QBenchmarkPerfEventsMeasurer::checkpoint()
{
+ ::ioctl(fd, PERF_EVENT_IOC_DISABLE);
+ qint64 value = readValue();
+ ::ioctl(fd, PERF_EVENT_IOC_ENABLE);
+ return value;
}
qint64 QBenchmarkPerfEventsMeasurer::stop()
{
+ // disable the counter
+ ::ioctl(fd, PERF_EVENT_IOC_DISABLE);
+ return readValue();
}
bool QBenchmarkPerfEventsMeasurer::isMeasurementAccepted(qint64)
@@ -110,6 +178,41 @@ QTest::QBenchmarkMetric QBenchmarkPerfEventsMeasurer::metricType()
return QTest::Events;
}
-#endif
+qint64 QBenchmarkPerfEventsMeasurer::readValue()
+{
+ /* from the kernel docs:
+ * struct read_format {
+ * { u64 value;
+ * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
+ * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
+ * { u64 id; } && PERF_FORMAT_ID
+ * } && !PERF_FORMAT_GROUP
+ */
+
+ struct read_format {
+ quint64 value;
+ quint64 time_enabled;
+ quint64 time_running;
+ } results;
+
+ size_t nread = 0;
+ while (nread < sizeof results) {
+ char *ptr = reinterpret_cast<char *>(&results);
+ qint64 r = qt_safe_read(fd, ptr + nread, sizeof results - nread);
+ if (r == -1) {
+ perror("QBenchmarkPerfEventsMeasurer::readValue: reading the results");
+ exit(1);
+ }
+ nread += quint64(r);
+ }
+
+ if (results.time_running == results.time_enabled)
+ return results.value;
+
+ // scale the results, though this shouldn't happen!
+ return results.value * (double(results.time_running) / double(results.time_enabled));
+}
QT_END_NAMESPACE
+
+#endif