summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMilian Wolff <milian.wolff@kdab.com>2018-10-29 11:10:43 +0100
committerUlf Hermann <ulf.hermann@qt.io>2019-05-03 12:44:42 +0000
commita4a0b030500ff568aa4faf45ca10f53db3aeb242 (patch)
tree77d1bee7f0909b9a332fd9743a7803de91b617b8
parentf6daea1a9b3afdcb7c5ae61eeac2ac20e8c88135 (diff)
Also resolve callchain stored in branch stack, if available
When a branch stack is available (i.e. `perf record -g lbr`), then resolve the callchain stored therein. In such cases, the fp callchain can potentially contain kernel frames, so still resolve those but skip any user frames therein. The branch stack then contains the user frames: The "to" register is the callee, the "from" register is the caller. That means the callchain can be build by combining the first entry's "to" register (the tail), with all "from" registers. See also `callchain__lbr_callstack_printf` in perf's `session.c` for more information. Change-Id: I0e060e158859eac6c130c073255af87c365679bf Reviewed-by: Milian Wolff <milian.wolff@kdab.com>
-rw-r--r--app/perfdata.h5
-rw-r--r--app/perfunwind.cpp54
2 files changed, 43 insertions, 16 deletions
diff --git a/app/perfdata.h b/app/perfdata.h
index d841843..1096314 100644
--- a/app/perfdata.h
+++ b/app/perfdata.h
@@ -408,8 +408,6 @@ public:
QList<ReadFormat> readFormats() const { return m_readFormats; }
-private:
-
struct BranchFlags {
quint64 mispred: 1;
quint64 predicted: 1;
@@ -425,6 +423,9 @@ private:
quint64 to;
BranchFlags flags;
};
+ const QList<BranchEntry> &branchStack() const { return m_branchStack; }
+
+private:
quint64 m_readFormat;
quint64 m_registerMask;
diff --git a/app/perfunwind.cpp b/app/perfunwind.cpp
index 09aed4f..7a33f8f 100644
--- a/app/perfunwind.cpp
+++ b/app/perfunwind.cpp
@@ -446,8 +446,21 @@ void PerfUnwind::resolveCallchain()
bool isKernel = false;
bool addedUserFrames = false;
PerfSymbolTable *symbols = symbolTable(m_currentUnwind.sample->pid());
- for (int i = 0; i < m_currentUnwind.sample->callchain().length(); ++i) {
+
+ auto reportIp = [&](quint64 ip) -> bool {
+ symbols->attachDwfl(&m_currentUnwind);
+ m_currentUnwind.frames.append(symbols->lookupFrame(ip, isKernel,
+ &m_currentUnwind.isInterworking));
+ return !symbols->cacheIsDirty();
+ };
+
+ // when we have a non-empty branch stack, we need to skip any non-kernel IPs
+ // in the normal callchain. The branch stack contains the non-kernel IPs then.
+ const bool hasBranchStack = !m_currentUnwind.sample->branchStack().isEmpty();
+
+ for (int i = 0, c = m_currentUnwind.sample->callchain().size(); i < c; ++i) {
quint64 ip = m_currentUnwind.sample->callchain()[i];
+
if (ip > PERF_CONTEXT_MAX) {
switch (ip) {
case PERF_CONTEXT_HV: // hypervisor
@@ -464,28 +477,41 @@ void PerfUnwind::resolveCallchain()
}
break;
default:
- qWarning() << "invalid callchain context" << ip;
+ qWarning() << "invalid callchain context" << hex << ip;
return;
}
} else {
+ // prefer user frames from branch stack if available
+ if (hasBranchStack && !isKernel)
+ break;
+
+ // sometimes it skips the first user frame.
if (!addedUserFrames && !isKernel && ip != m_currentUnwind.sample->ip()) {
- // sometimes it skips the first user frame.
- symbols->attachDwfl(&m_currentUnwind);
- m_currentUnwind.frames.append(symbols->lookupFrame(
- m_currentUnwind.sample->ip(), false,
- &m_currentUnwind.isInterworking));
+ if (!reportIp(m_currentUnwind.sample->ip()))
+ return;
}
- symbols->attachDwfl(&m_currentUnwind);
- m_currentUnwind.frames.append(symbols->lookupFrame(
- ip, isKernel,
- &m_currentUnwind.isInterworking));
+ if (!reportIp(ip))
+ return;
+
if (!isKernel)
addedUserFrames = true;
}
+ }
+
+ // when we are still in the kernel, we cannot have a meaningful branch stack
+ if (isKernel)
+ return;
- if (symbols->cacheIsDirty())
- break;
+ // if available, also resolve the callchain stored in the branch stack:
+ // caller is stored in "from", callee is stored in "to"
+ // so the branch is made up of the first callee and all callers
+ for (int i = 0, c = m_currentUnwind.sample->branchStack().size(); i < c; ++i) {
+ const auto& entry = m_currentUnwind.sample->branchStack()[i];
+ if (i == 0 && !reportIp(entry.to))
+ return;
+ if (!reportIp(entry.from))
+ return;
}
}
@@ -574,7 +600,7 @@ void PerfUnwind::analyze(const PerfRecordSample &sample)
m_currentUnwind.frames.clear();
userSymbols->updatePerfMap();
- if (sample.callchain().length() > 0)
+ if (!sample.callchain().isEmpty() || !sample.branchStack().isEmpty())
resolveCallchain();
bool userDirty = userSymbols->cacheIsDirty();