summaryrefslogtreecommitdiffstats
path: root/app
diff options
context:
space:
mode:
authorMilian Wolff <milian.wolff@kdab.com>2017-12-22 13:38:38 +0100
committerUlf Hermann <ulf.hermann@qt.io>2019-05-02 15:26:26 +0000
commitfcbbc909654fe2da4cec5e9b3e527c05bff5c4c0 (patch)
treeb70fa14e2e39ccc61bd258545f64860c569eca44 /app
parent05f20d7926f3b4ed38697b962535c03bde268f15 (diff)
Send task events in time-ordered fashion
So far, only the context switches where buffered and then handled in time-ordered fashion. Now we do this also for Command, ThreadStart, ThreadEnd and Lost events. Additionally, we properly handle these events when no more samples are available, which can happen for applications that basically only sleep and don't trigger any significant CPU load. Change-Id: I4b1c8a1cfc91737a75a48f38dba04d6742f7c3a3 Reviewed-by: Milian Wolff <milian.wolff@kdab.com>
Diffstat (limited to 'app')
-rw-r--r--app/perfunwind.cpp109
-rw-r--r--app/perfunwind.h31
2 files changed, 76 insertions, 64 deletions
diff --git a/app/perfunwind.cpp b/app/perfunwind.cpp
index baeea39..5499312 100644
--- a/app/perfunwind.cpp
+++ b/app/perfunwind.cpp
@@ -63,10 +63,10 @@ void PerfUnwind::Stats::finishedRound()
maxSamplesPerRound = std::max(maxSamplesPerRound, numSamplesInRound);
maxMmapsPerRound = std::max(maxMmapsPerRound, numMmapsInRound);
- maxContextSwitchesPerRound = std::max(maxContextSwitchesPerRound, numContextSwitchesInRound);
+ maxTaskEventsPerRound = std::max(maxTaskEventsPerRound, numTaskEventsInRound);
numSamplesInRound = 0;
numMmapsInRound = 0;
- numContextSwitchesInRound = 0;
+ numTaskEventsInRound = 0;
++numRounds;
maxTotalEventSizePerRound = std::max(maxTotalEventSizePerRound,
@@ -149,10 +149,10 @@ PerfUnwind::~PerfUnwind()
out << "mmaps time violations: " << m_stats.numTimeViolatingMmaps << "\n";
out << "max samples per round: " << m_stats.maxSamplesPerRound << "\n";
out << "max mmaps per round: " << m_stats.maxMmapsPerRound << "\n";
- out << "max context switches per round: " << m_stats.maxContextSwitchesPerRound << "\n";
+ out << "max task events per round: " << m_stats.maxTaskEventsPerRound << "\n";
out << "max samples per flush: " << m_stats.maxSamplesPerFlush << "\n";
out << "max mmaps per flush: " << m_stats.maxMmapsPerFlush << "\n";
- out << "max context switches per flush: " << m_stats.maxContextSwitchesPerFlush << "\n";
+ out << "max task events per flush: " << m_stats.maxTaskEventsPerFlush << "\n";
out << "max buffer size: " << m_stats.maxBufferSize << "\n";
out << "max total event size per round: " << m_stats.maxTotalEventSizePerRound << "\n";
out << "max time: " << m_stats.maxTime << "\n";
@@ -221,11 +221,9 @@ void PerfUnwind::sendBuffer(const QByteArray &buffer)
void PerfUnwind::comm(const PerfRecordComm &comm)
{
const qint32 commId = resolveString(comm.comm());
- QByteArray buffer;
- QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(Command)
- << comm.pid() << comm.tid() << comm.time()
- << commId;
- m_auxBuffer.insert(comm.time(), buffer);
+
+ bufferEvent(TaskEvent{comm.pid(), comm.tid(), comm.time(), commId, Command},
+ &m_taskEventsBuffer, &m_stats.numTaskEventsInRound);
}
void PerfUnwind::attr(const PerfRecordAttr &attr)
@@ -301,10 +299,8 @@ void PerfUnwind::sendEventFormat(qint32 id, const EventFormat &format)
void PerfUnwind::lost(const PerfRecordLost &lost)
{
- QByteArray buffer;
- QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(LostDefinition)
- << lost.pid() << lost.tid() << lost.time();
- m_auxBuffer.insert(lost.time(), buffer);
+ bufferEvent(TaskEvent{lost.pid(), lost.tid(), lost.time(), 0, LostDefinition},
+ &m_taskEventsBuffer, &m_stats.numTaskEventsInRound);
}
void PerfUnwind::features(const PerfFeatures &features)
@@ -635,12 +631,6 @@ void PerfUnwind::analyze(const PerfRecordSample &sample)
}
}
- for (auto it = m_auxBuffer.begin();
- it != m_auxBuffer.end() && it.key() < sample.time();
- it = m_auxBuffer.erase(it)) {
- sendBuffer(it.value());
- }
-
QVector<QPair<qint32, quint64>> values;
if (sample.readFormats().isEmpty()) {
values.push_back({ attributesId, sample.period() });
@@ -672,20 +662,14 @@ void PerfUnwind::analyze(const PerfRecordSample &sample)
void PerfUnwind::fork(const PerfRecordFork &sample)
{
- QByteArray buffer;
- QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(ThreadStart)
- << sample.childPid() << sample.childTid()
- << sample.time();
- m_auxBuffer.insert(sample.time(), buffer);
+ bufferEvent(TaskEvent{sample.childPid(), sample.childTid(), sample.time(), 0, ThreadStart},
+ &m_taskEventsBuffer, &m_stats.numTaskEventsInRound);
}
void PerfUnwind::exit(const PerfRecordExit &sample)
{
- QByteArray buffer;
- QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(ThreadEnd)
- << sample.childPid() << sample.childTid()
- << sample.time();
- m_auxBuffer.insert(sample.time(), buffer);
+ bufferEvent(TaskEvent{sample.childPid(), sample.childTid(), sample.time(), 0, ThreadEnd},
+ &m_taskEventsBuffer, &m_stats.numTaskEventsInRound);
}
void PerfUnwind::sendString(qint32 id, const QByteArray& string)
@@ -854,17 +838,20 @@ void PerfUnwind::forwardMmapBuffer(QList<PerfRecordMmap>::Iterator &mmapIt,
}
}
+template<typename T>
+bool sortByTime(const T& lhs, const T& rhs)
+{
+ return lhs.time() < rhs.time();
+}
+
void PerfUnwind::flushEventBuffer(uint desiredBufferSize)
{
- auto sortByTime = [](const PerfRecord &lhs, const PerfRecord &rhs) {
- return lhs.time() < rhs.time();
- };
// stable sort here to keep order of events with the same time
// esp. when we runtime-attach, we will get lots of mmap events with time 0
// which we must not shuffle
- std::stable_sort(m_mmapBuffer.begin(), m_mmapBuffer.end(), sortByTime);
- std::stable_sort(m_sampleBuffer.begin(), m_sampleBuffer.end(), sortByTime);
- std::stable_sort(m_contextSwitchBuffer.begin(), m_contextSwitchBuffer.end(), sortByTime);
+ std::stable_sort(m_mmapBuffer.begin(), m_mmapBuffer.end(), sortByTime<PerfRecord>);
+ std::stable_sort(m_sampleBuffer.begin(), m_sampleBuffer.end(), sortByTime<PerfRecord>);
+ std::stable_sort(m_taskEventsBuffer.begin(), m_taskEventsBuffer.end(), sortByTime<TaskEvent>);
if (m_stats.enabled) {
for (const auto &sample : m_sampleBuffer) {
@@ -901,8 +888,8 @@ void PerfUnwind::flushEventBuffer(uint desiredBufferSize)
uint bufferSize = m_eventBufferSize;
- auto contextSwitchIt = m_contextSwitchBuffer.begin();
- auto contextSwitchEnd = m_contextSwitchBuffer.end();
+ auto taskEventIt = m_taskEventsBuffer.begin();
+ auto taskEventEnd = m_taskEventsBuffer.end();
for (; m_eventBufferSize > desiredBufferSize && sampleIt != sampleEnd; ++sampleIt) {
const quint64 timestamp = sampleIt->time();
@@ -934,18 +921,28 @@ void PerfUnwind::flushEventBuffer(uint desiredBufferSize)
forwardMmapBuffer(mmapIt, mmapEnd, timestamp);
- for (; contextSwitchIt != contextSwitchEnd && contextSwitchIt->time() <= sampleIt->time();
- ++contextSwitchIt) {
+ for (; taskEventIt != taskEventEnd && taskEventIt->time() <= sampleIt->time();
+ ++taskEventIt) {
if (!m_stats.enabled) {
- sendContextSwitch(*contextSwitchIt);
+ sendTaskEvent(*taskEventIt);
}
- m_eventBufferSize -= contextSwitchIt->size();
+ m_eventBufferSize -= taskEventIt->size();
}
analyze(*sampleIt);
m_eventBufferSize -= sampleIt->size();
}
+ // also flush task events after samples got depleted
+ // this ensures we send all of them, even for situations where the client
+ // application is not CPU-heavy but rather sleeps most of the time
+ for (; m_eventBufferSize > desiredBufferSize && taskEventIt != taskEventEnd; ++taskEventIt) {
+ if (!m_stats.enabled) {
+ sendTaskEvent(*taskEventIt);
+ }
+ m_eventBufferSize -= taskEventIt->size();
+ }
+
if (m_stats.enabled) {
++m_stats.numBufferFlushes;
const auto samples = std::distance(m_sampleBuffer.begin(), sampleIt);
@@ -956,15 +953,15 @@ void PerfUnwind::flushEventBuffer(uint desiredBufferSize)
Q_ASSERT(mmaps >= 0 && mmaps < std::numeric_limits<uint>::max());
m_stats.maxMmapsPerFlush = std::max(static_cast<uint>(mmaps),
m_stats.maxMmapsPerFlush);
- const auto contextSwitches = std::distance(m_contextSwitchBuffer.begin(), contextSwitchIt);
- Q_ASSERT(contextSwitches >= 0 && contextSwitches < std::numeric_limits<uint>::max());
- m_stats.maxContextSwitchesPerFlush = std::max(static_cast<uint>(contextSwitches),
- m_stats.maxContextSwitchesPerFlush);
+ const auto taskEvents = std::distance(m_taskEventsBuffer.begin(), taskEventIt);
+ Q_ASSERT(taskEvents >= 0 && taskEvents < std::numeric_limits<uint>::max());
+ m_stats.maxTaskEventsPerFlush = std::max(static_cast<uint>(taskEvents),
+ m_stats.maxTaskEventsPerFlush);
}
m_sampleBuffer.erase(m_sampleBuffer.begin(), sampleIt);
m_mmapBuffer.erase(m_mmapBuffer.begin(), mmapIt);
- m_contextSwitchBuffer.erase(m_contextSwitchBuffer.begin(), contextSwitchIt);
+ m_taskEventsBuffer.erase(m_taskEventsBuffer.begin(), taskEventIt);
if (!violatesTimeOrder)
return;
@@ -992,15 +989,23 @@ void PerfUnwind::flushEventBuffer(uint desiredBufferSize)
void PerfUnwind::contextSwitch(const PerfRecordContextSwitch& contextSwitch)
{
- bufferEvent(contextSwitch, &m_contextSwitchBuffer, &m_stats.numContextSwitchesInRound);
+ bufferEvent(TaskEvent{contextSwitch.pid(), contextSwitch.tid(), contextSwitch.time(),
+ contextSwitch.misc() & PERF_RECORD_MISC_SWITCH_OUT, ContextSwitchDefinition},
+ &m_taskEventsBuffer, &m_stats.numTaskEventsInRound);
}
-void PerfUnwind::sendContextSwitch(const PerfRecordContextSwitch& contextSwitch)
+void PerfUnwind::sendTaskEvent(const TaskEvent& taskEvent)
{
QByteArray buffer;
- QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(ContextSwitchDefinition)
- << contextSwitch.pid() << contextSwitch.tid()
- << contextSwitch.time()
- << bool(contextSwitch.misc() & PERF_RECORD_MISC_SWITCH_OUT);
+ QDataStream stream(&buffer, QIODevice::WriteOnly);
+ stream << static_cast<quint8>(taskEvent.m_type)
+ << taskEvent.m_pid << taskEvent.m_tid
+ << taskEvent.m_time;
+
+ if (taskEvent.m_type == ContextSwitchDefinition)
+ stream << static_cast<bool>(taskEvent.m_payload);
+ else if (taskEvent.m_type == Command)
+ stream << taskEvent.m_payload;
+
sendBuffer(buffer);
}
diff --git a/app/perfunwind.h b/app/perfunwind.h
index 45d1567..4096d77 100644
--- a/app/perfunwind.h
+++ b/app/perfunwind.h
@@ -110,9 +110,9 @@ public:
Stats()
: numSamples(0), numMmaps(0), numRounds(0), numBufferFlushes(0),
numTimeViolatingSamples(0), numTimeViolatingMmaps(0),
- numSamplesInRound(0), numMmapsInRound(0), numContextSwitchesInRound(0),
- maxSamplesPerRound(0), maxMmapsPerRound(0), maxContextSwitchesPerRound(0),
- maxSamplesPerFlush(0), maxMmapsPerFlush(0), maxContextSwitchesPerFlush(0),
+ numSamplesInRound(0), numMmapsInRound(0), numTaskEventsInRound(0),
+ maxSamplesPerRound(0), maxMmapsPerRound(0), maxTaskEventsPerRound(0),
+ maxSamplesPerFlush(0), maxMmapsPerFlush(0), maxTaskEventsPerFlush(0),
maxBufferSize(0), maxTotalEventSizePerRound(0),
maxTime(0), maxTimeBetweenRounds(0), maxReorderTime(0),
lastRoundTime(0), totalEventSizePerRound(0),
@@ -130,13 +130,13 @@ public:
quint64 numTimeViolatingMmaps;
uint numSamplesInRound;
uint numMmapsInRound;
- uint numContextSwitchesInRound;
+ uint numTaskEventsInRound;
uint maxSamplesPerRound;
uint maxMmapsPerRound;
- uint maxContextSwitchesPerRound;
+ uint maxTaskEventsPerRound;
uint maxSamplesPerFlush;
uint maxMmapsPerFlush;
- uint maxContextSwitchesPerFlush;
+ uint maxTaskEventsPerFlush;
uint maxBufferSize;
uint maxTotalEventSizePerRound;
quint64 maxTime;
@@ -231,9 +231,6 @@ public:
{
finishedRound();
flushEventBuffer(0);
- for (const QByteArray &aux : qAsConst(m_auxBuffer))
- sendBuffer(aux);
- m_auxBuffer.clear();
}
private:
@@ -277,10 +274,20 @@ private:
QString m_kallsymsPath;
bool m_ignoreKallsymsBuildId;
- QMultiMap<quint64, QByteArray> m_auxBuffer;
QList<PerfRecordSample> m_sampleBuffer;
QList<PerfRecordMmap> m_mmapBuffer;
- QList<PerfRecordContextSwitch> m_contextSwitchBuffer;
+ struct TaskEvent
+ {
+ qint32 m_pid;
+ qint32 m_tid;
+ quint64 m_time;
+ qint32 m_payload;
+ EventType m_type;
+
+ quint64 time() const { return m_time; }
+ quint64 size() const { return sizeof(TaskEvent); }
+ };
+ QList<TaskEvent> m_taskEventsBuffer;
QHash<qint32, PerfSymbolTable *> m_symbolTables;
PerfKallsyms m_kallsyms;
PerfTracingData m_tracingData;
@@ -313,7 +320,7 @@ private:
void sendSymbol(qint32 id, const Symbol &symbol);
void sendAttributes(qint32 id, const PerfEventAttributes &attributes, const QByteArray &name);
void sendEventFormat(qint32 id, const EventFormat &format);
- void sendContextSwitch(const PerfRecordContextSwitch &contextSwitch);
+ void sendTaskEvent(const TaskEvent &taskEvent);
template<typename Event>
void bufferEvent(const Event &event, QList<Event> *buffer, uint *eventCounter);