diff options
Diffstat (limited to 'src/testlib/qbenchmarkperfevents.cpp')
-rw-r--r-- | src/testlib/qbenchmarkperfevents.cpp | 607 |
1 files changed, 607 insertions, 0 deletions
diff --git a/src/testlib/qbenchmarkperfevents.cpp b/src/testlib/qbenchmarkperfevents.cpp new file mode 100644 index 0000000000..e3034d1f94 --- /dev/null +++ b/src/testlib/qbenchmarkperfevents.cpp @@ -0,0 +1,607 @@ +/**************************************************************************** +** +** Copyright (C) 2013 Intel Corporation. +** Contact: http://www.qt-project.org/legal +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and Digia. For licensing terms and +** conditions see http://qt.digia.com/licensing. For further information +** use the contact form at http://qt.digia.com/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Digia gives you certain additional +** rights. These rights are described in the Digia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3.0 as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU General Public License version 3.0 requirements will be +** met: http://www.gnu.org/copyleft/gpl.html. +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qbenchmarkperfevents_p.h" +#include "qbenchmarkmetric.h" +#include "qbenchmark_p.h" + +#ifdef QTESTLIB_USE_PERF_EVENTS + +// include the qcore_unix_p.h without core-private +// we only use inline functions anyway +#include "../corelib/kernel/qcore_unix_p.h" + +#include <sys/types.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <stdio.h> + +#include <sys/syscall.h> +#include <sys/ioctl.h> + +#include "3rdparty/linux_perf_event_p.h" + +// for PERF_TYPE_HW_CACHE, the config is a bitmask +// lowest 8 bits: cache type +// bits 8 to 15: cache operation +// bits 16 to 23: cache result +#define CACHE_L1D_READ (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) +#define CACHE_L1D_WRITE (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) +#define CACHE_L1D_PREFETCH (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) +#define CACHE_L1I_READ (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) +#define CACHE_L1I_PREFETCH (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) +#define CACHE_LLC_READ (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) +#define CACHE_LLC_WRITE (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) +#define CACHE_LLC_PREFETCH (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) +#define CACHE_L1D_READ_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) +#define CACHE_L1D_WRITE_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) +#define CACHE_L1D_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) +#define CACHE_L1I_READ_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) +#define CACHE_L1I_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) +#define CACHE_LLC_READ_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) +#define CACHE_LLC_WRITE_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) +#define CACHE_LLC_PREFETCH_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) +#define CACHE_BRANCH_READ (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) +#define CACHE_BRANCH_READ_MISS (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16) + +QT_BEGIN_NAMESPACE + +static perf_event_attr attr; + +static void initPerf() +{ + static bool done; + if (!done) { + memset(&attr, 0, sizeof attr); + attr.size = sizeof attr; + attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; + attr.disabled = true; // we'll enable later + attr.inherit = true; // let children processes inherit the monitoring + attr.pinned = true; // keep it running in the hardware + attr.inherit_stat = true; // aggregate all the info from child processes + attr.task = true; // trace fork/exits + + // set a default performance counter: CPU cycles + attr.type = PERF_TYPE_HARDWARE; + attr.config = PERF_COUNT_HW_CPU_CYCLES; // default + + done = true; + } +} + +/*! + \class QBenchmarkPerfEvents + \brief The Linux perf events benchmark backend + + This benchmark backend uses the Linux Performance Counters interface, + introduced with the Linux kernel v2.6.31. The interface is done by one + system call (perf_event_open) which takes an attribute structure and + returns a file descriptor. + + More information: + \li design docs: tools/perf/design.txt <http://lxr.linux.no/linux/tools/perf/design.txt> + \li sample tool: tools/perf/builtin-stat.c <http://lxr.linux.no/linux/tools/perf/builtin-stat.c> + (note: as of v3.3.1, the documentation is out-of-date with the kernel + interface, so reading the source code of existing tools is necessary) + + This benchlib backend monitors the current process as well as child process + launched. We do not try to benchmark in kernel or hypervisor mode, as that + usually requires elevated privileges. + */ + +static int perf_event_open(perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) +{ + return syscall(SYS_perf_event_open, attr, pid, cpu, group_fd, flags); +} + +bool QBenchmarkPerfEventsMeasurer::isAvailable() +{ + // this generates an EFAULT because attr == NULL if perf_event_open is available + // if the kernel is too old, it generates ENOSYS + return perf_event_open(0, 0, 0, 0, 0) == -1 && errno != ENOSYS; +} + +/* Event list structure + The following table provides the list of supported events + + Event type Event counter Unit Name and aliases + HARDWARE CPU_CYCLES CPUCycles cycles cpu-cycles + HARDWARE INSTRUCTIONS Instructions instructions + HARDWARE CACHE_REFERENCES CacheReferences cache-references + HARDWARE CACHE_MISSES CacheMisses cache-misses + HARDWARE BRANCH_INSTRUCTIONS BranchInstructions branch-instructions branches + HARDWARE BRANCH_MISSES BranchMisses branch-misses + HARDWARE BUS_CYCLES BusCycles bus-cycles + HARDWARE STALLED_CYCLES_FRONTEND StalledCycles stalled-cycles-frontend idle-cycles-frontend + HARDWARE STALLED_CYCLES_BACKEND StalledCycles stalled-cycles-backend idle-cycles-backend + SOFTWARE CPU_CLOCK WalltimeMilliseconds cpu-clock + SOFTWARE TASK_CLOCK WalltimeMilliseconds task-clock + SOFTWARE PAGE_FAULTS PageFaults page-faults faults + SOFTWARE PAGE_FAULTS_MAJ MajorPageFaults major-faults + SOFTWARE PAGE_FAULTS_MIN MinorPageFaults minor-faults + SOFTWARE CONTEXT_SWITCHES ContextSwitches context-switches cs + SOFTWARE CPU_MIGRATIONS CPUMigrations cpu-migrations migrations + SOFTWARE ALIGNMENT_FAULTS AlignmentFaults alignment-faults + SOFTWARE EMULATION_FAULTS EmulationFaults emulation-faults + HW_CACHE L1D_READ CacheReads l1d-cache-reads l1d-cache-loads l1d-reads l1d-loads + HW_CACHE L1D_WRITE CacheWrites l1d-cache-writes l1d-cache-stores l1d-writes l1d-stores + HW_CACHE L1D_PREFETCH CachePrefetches l1d-cache-prefetches l1d-prefetches + HW_CACHE L1I_READ CacheReads l1i-cache-reads l1i-cache-loads l1i-reads l1i-loads + HW_CACHE L1I_PREFETCH CachePrefetches l1i-cache-prefetches l1i-prefetches + HW_CACHE LLC_READ CacheReads llc-cache-reads llc-cache-loads llc-loads llc-reads + HW_CACHE LLC_WRITE CacheWrites llc-cache-writes llc-cache-stores llc-writes llc-stores + HW_CACHE LLC_PREFETCH CachePrefetches llc-cache-prefetches llc-prefetches + HW_CACHE L1D_READ_MISS CacheReads l1d-cache-read-misses l1d-cache-load-misses l1d-read-misses l1d-load-misses + HW_CACHE L1D_WRITE_MISS CacheWrites l1d-cache-write-misses l1d-cache-store-misses l1d-write-misses l1d-store-misses + HW_CACHE L1D_PREFETCH_MISS CachePrefetches l1d-cache-prefetch-misses l1d-prefetch-misses + HW_CACHE L1I_READ_MISS CacheReads l1i-cache-read-misses l1i-cache-load-misses l1i-read-misses l1i-load-misses + HW_CACHE L1I_PREFETCH_MISS CachePrefetches l1i-cache-prefetch-misses l1i-prefetch-misses + HW_CACHE LLC_READ_MISS CacheReads llc-cache-read-misses llc-cache-load-misses llc-read-misses llc-load-misses + HW_CACHE LLC_WRITE_MISS CacheWrites llc-cache-write-misses llc-cache-store-misses llc-write-misses llc-store-misses + HW_CACHE LLC_PREFETCH_MISS CachePrefetches llc-cache-prefetch-misses llc-prefetch-misses + HW_CACHE BRANCH_READ BranchInstructions branch-reads branch-loads branch-predicts + HW_CACHE BRANCH_READ_MISS BranchMisses branch-mispredicts branch-read-misses branch-load-misses + + Use the following Perl script to re-generate the list +=== cut perl === +#!/usr/bin/env perl +# Load all entries into %map +while (<STDIN>) { + m/^\s*(.*)\s*$/; + @_ = split /\s+/, $1; + $type = shift @_; + $id = ($type eq "HARDWARE" ? "PERF_COUNT_HW_" : + $type eq "SOFTWARE" ? "PERF_COUNT_SW_" : + $type eq "HW_CACHE" ? "CACHE_" : "") . shift @_; + $unit = shift @_; + + for $string (@_) { + die "$string was already seen!" if defined($map{$string}); + $map{$string} = [-1, $type, $id, $unit]; + push @strings, $string; + } +} + +# sort the map and print the string list +@strings = sort @strings; +print "static const char eventlist_strings[] = \n"; +$counter = 0; +for $entry (@strings) { + print " \"$entry\\0\"\n"; + $map{$entry}[0] = $counter; + $counter += 1 + length $entry; +} + +# print the table +print " \"\\0\";\n\nstatic const Events eventlist[] = {\n"; +for $entry (sort @strings) { + printf " { %3d, PERF_TYPE_%s, %s, QTest::%s },\n", + $map{$entry}[0], + $map{$entry}[1], + $map{$entry}[2], + $map{$entry}[3]; +} +print " { 0, PERF_TYPE_MAX, 0, QTest::Events }\n};\n"; +=== cut perl === +*/ + +struct Events { + unsigned offset; + quint32 type; + quint64 event_id; + QTest::QBenchmarkMetric metric; +}; + +/* -- BEGIN GENERATED CODE -- */ +static const char eventlist_strings[] = + "alignment-faults\0" + "branch-instructions\0" + "branch-load-misses\0" + "branch-loads\0" + "branch-mispredicts\0" + "branch-misses\0" + "branch-predicts\0" + "branch-read-misses\0" + "branch-reads\0" + "branches\0" + "bus-cycles\0" + "cache-misses\0" + "cache-references\0" + "context-switches\0" + "cpu-clock\0" + "cpu-cycles\0" + "cpu-migrations\0" + "cs\0" + "cycles\0" + "emulation-faults\0" + "faults\0" + "idle-cycles-backend\0" + "idle-cycles-frontend\0" + "instructions\0" + "l1d-cache-load-misses\0" + "l1d-cache-loads\0" + "l1d-cache-prefetch-misses\0" + "l1d-cache-prefetches\0" + "l1d-cache-read-misses\0" + "l1d-cache-reads\0" + "l1d-cache-store-misses\0" + "l1d-cache-stores\0" + "l1d-cache-write-misses\0" + "l1d-cache-writes\0" + "l1d-load-misses\0" + "l1d-loads\0" + "l1d-prefetch-misses\0" + "l1d-prefetches\0" + "l1d-read-misses\0" + "l1d-reads\0" + "l1d-store-misses\0" + "l1d-stores\0" + "l1d-write-misses\0" + "l1d-writes\0" + "l1i-cache-load-misses\0" + "l1i-cache-loads\0" + "l1i-cache-prefetch-misses\0" + "l1i-cache-prefetches\0" + "l1i-cache-read-misses\0" + "l1i-cache-reads\0" + "l1i-load-misses\0" + "l1i-loads\0" + "l1i-prefetch-misses\0" + "l1i-prefetches\0" + "l1i-read-misses\0" + "l1i-reads\0" + "llc-cache-load-misses\0" + "llc-cache-loads\0" + "llc-cache-prefetch-misses\0" + "llc-cache-prefetches\0" + "llc-cache-read-misses\0" + "llc-cache-reads\0" + "llc-cache-store-misses\0" + "llc-cache-stores\0" + "llc-cache-write-misses\0" + "llc-cache-writes\0" + "llc-load-misses\0" + "llc-loads\0" + "llc-prefetch-misses\0" + "llc-prefetches\0" + "llc-read-misses\0" + "llc-reads\0" + "llc-store-misses\0" + "llc-stores\0" + "llc-write-misses\0" + "llc-writes\0" + "major-faults\0" + "migrations\0" + "minor-faults\0" + "page-faults\0" + "stalled-cycles-backend\0" + "stalled-cycles-frontend\0" + "task-clock\0" + "\0"; + +static const Events eventlist[] = { + { 0, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS, QTest::AlignmentFaults }, + { 17, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, QTest::BranchInstructions }, + { 37, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses }, + { 56, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions }, + { 69, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses }, + { 88, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, QTest::BranchMisses }, + { 102, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions }, + { 118, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses }, + { 137, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions }, + { 150, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, QTest::BranchInstructions }, + { 159, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES, QTest::BusCycles }, + { 170, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, QTest::CacheMisses }, + { 183, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, QTest::CacheReferences }, + { 200, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, QTest::ContextSwitches }, + { 217, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, QTest::WalltimeMilliseconds }, + { 227, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, QTest::CPUCycles }, + { 238, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, QTest::CPUMigrations }, + { 253, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, QTest::ContextSwitches }, + { 256, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, QTest::CPUCycles }, + { 263, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS, QTest::EmulationFaults }, + { 280, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, QTest::PageFaults }, + { 287, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, QTest::StalledCycles }, + { 307, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, QTest::StalledCycles }, + { 328, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, QTest::Instructions }, + { 341, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads }, + { 363, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads }, + { 379, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, QTest::CachePrefetches }, + { 405, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, QTest::CachePrefetches }, + { 426, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads }, + { 448, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads }, + { 464, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites }, + { 487, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites }, + { 504, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites }, + { 527, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites }, + { 544, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads }, + { 560, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads }, + { 570, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, QTest::CachePrefetches }, + { 590, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, QTest::CachePrefetches }, + { 605, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads }, + { 621, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads }, + { 631, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites }, + { 648, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites }, + { 659, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites }, + { 676, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites }, + { 687, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads }, + { 709, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads }, + { 725, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, QTest::CachePrefetches }, + { 751, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, QTest::CachePrefetches }, + { 772, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads }, + { 794, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads }, + { 810, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads }, + { 826, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads }, + { 836, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, QTest::CachePrefetches }, + { 856, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, QTest::CachePrefetches }, + { 871, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads }, + { 887, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads }, + { 897, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads }, + { 919, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads }, + { 935, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, QTest::CachePrefetches }, + { 961, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, QTest::CachePrefetches }, + { 982, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads }, + { 1004, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads }, + { 1020, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites }, + { 1043, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites }, + { 1060, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites }, + { 1083, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites }, + { 1100, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads }, + { 1116, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads }, + { 1126, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, QTest::CachePrefetches }, + { 1146, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, QTest::CachePrefetches }, + { 1161, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads }, + { 1177, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads }, + { 1187, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites }, + { 1204, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites }, + { 1215, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites }, + { 1232, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites }, + { 1243, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, QTest::MajorPageFaults }, + { 1256, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, QTest::CPUMigrations }, + { 1267, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, QTest::MinorPageFaults }, + { 1280, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, QTest::PageFaults }, + { 1292, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, QTest::StalledCycles }, + { 1315, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, QTest::StalledCycles }, + { 1339, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, QTest::WalltimeMilliseconds }, + { 0, PERF_TYPE_MAX, 0, QTest::Events } +}; +/* -- END GENERATED CODE -- */ + +QTest::QBenchmarkMetric QBenchmarkPerfEventsMeasurer::metricForEvent(quint32 type, quint64 event_id) +{ + const Events *ptr = eventlist; + for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) { + if (ptr->type == type && ptr->event_id == event_id) + return ptr->metric; + } + return QTest::Events; +} + +void QBenchmarkPerfEventsMeasurer::setCounter(const char *name) +{ + initPerf(); + const char *colon = strchr(name, ':'); + int n = colon ? colon - name : strlen(name); + const Events *ptr = eventlist; + for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) { + int c = strncmp(name, eventlist_strings + ptr->offset, n); + if (c == 0) + break; + if (c < 0) { + fprintf(stderr, "ERROR: Performance counter type '%s' is unknown\n", name); + exit(1); + } + } + + attr.type = ptr->type; + attr.config = ptr->event_id; + + // now parse the attributes + if (!colon) + return; + while (*++colon) { + switch (*colon) { + case 'u': + attr.exclude_user = true; + break; + case 'k': + attr.exclude_kernel = true; + break; + case 'h': + attr.exclude_hv = true; + break; + case 'G': + attr.exclude_guest = true; + break; + case 'H': + attr.exclude_host = true; + break; + default: + fprintf(stderr, "ERROR: Unknown attribute '%c'\n", *colon); + exit(1); + } + } +} + +void QBenchmarkPerfEventsMeasurer::listCounters() +{ + if (!isAvailable()) { + printf("Performance counters are not available on this system\n"); + return; + } + + printf("The following performance counters are available:\n"); + const Events *ptr = eventlist; + for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) { + printf(" %-30s [%s]\n", eventlist_strings + ptr->offset, + ptr->type == PERF_TYPE_HARDWARE ? "hardware" : + ptr->type == PERF_TYPE_SOFTWARE ? "software" : + ptr->type == PERF_TYPE_HW_CACHE ? "cache" : "other"); + } + + printf("\nAttributes can be specified by adding a colon and the following:\n" + " u - exclude measuring in the userspace\n" + " k - exclude measuring in kernel mode\n" + " h - exclude measuring in the hypervisor\n" + " G - exclude measuring when running virtualized (guest VM)\n" + " H - exclude measuring when running non-virtualized (host system)\n" + "Attributes can be combined, for example: -perfcounter branch-mispredicts:kh\n"); +} + +QBenchmarkPerfEventsMeasurer::QBenchmarkPerfEventsMeasurer() + : fd(-1) +{ +} + +QBenchmarkPerfEventsMeasurer::~QBenchmarkPerfEventsMeasurer() +{ + qt_safe_close(fd); +} + +void QBenchmarkPerfEventsMeasurer::init() +{ +} + +void QBenchmarkPerfEventsMeasurer::start() +{ + + initPerf(); + // pid == 0 -> attach to the current process + // cpu == -1 -> monitor on all CPUs + // group_fd == -1 -> this is the group leader + // flags == 0 -> reserved, must be zero + fd = perf_event_open(&attr, 0, -1, -1, 0); + if (fd == -1) { + perror("QBenchmarkPerfEventsMeasurer::start: perf_event_open"); + exit(1); + } else { + ::fcntl(fd, F_SETFD, FD_CLOEXEC); + } + + // enable the counter + ::ioctl(fd, PERF_EVENT_IOC_RESET); + ::ioctl(fd, PERF_EVENT_IOC_ENABLE); +} + +qint64 QBenchmarkPerfEventsMeasurer::checkpoint() +{ + ::ioctl(fd, PERF_EVENT_IOC_DISABLE); + qint64 value = readValue(); + ::ioctl(fd, PERF_EVENT_IOC_ENABLE); + return value; +} + +qint64 QBenchmarkPerfEventsMeasurer::stop() +{ + // disable the counter + ::ioctl(fd, PERF_EVENT_IOC_DISABLE); + return readValue(); +} + +bool QBenchmarkPerfEventsMeasurer::isMeasurementAccepted(qint64) +{ + return true; +} + +int QBenchmarkPerfEventsMeasurer::adjustIterationCount(int) +{ + return 1; +} + +int QBenchmarkPerfEventsMeasurer::adjustMedianCount(int) +{ + return 1; +} + +QTest::QBenchmarkMetric QBenchmarkPerfEventsMeasurer::metricType() +{ + return metricForEvent(attr.type, attr.config); +} + +static quint64 rawReadValue(int fd) +{ + /* from the kernel docs: + * struct read_format { + * { u64 value; + * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED + * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING + * { u64 id; } && PERF_FORMAT_ID + * } && !PERF_FORMAT_GROUP + */ + + struct read_format { + quint64 value; + quint64 time_enabled; + quint64 time_running; + } results; + + size_t nread = 0; + while (nread < sizeof results) { + char *ptr = reinterpret_cast<char *>(&results); + qint64 r = qt_safe_read(fd, ptr + nread, sizeof results - nread); + if (r == -1) { + perror("QBenchmarkPerfEventsMeasurer::readValue: reading the results"); + exit(1); + } + nread += quint64(r); + } + + if (results.time_running == results.time_enabled) + return results.value; + + // scale the results, though this shouldn't happen! + return results.value * (double(results.time_running) / double(results.time_enabled)); +} + +qint64 QBenchmarkPerfEventsMeasurer::readValue() +{ + quint64 raw = rawReadValue(fd); + if (metricType() == QTest::WalltimeMilliseconds) { + // perf returns nanoseconds + return raw / 1000000; + } + return raw; +} + +QT_END_NAMESPACE + +#endif |