diff options
author | Milian Wolff <milian.wolff@kdab.com> | 2018-10-29 11:10:43 +0100 |
---|---|---|
committer | Ulf Hermann <ulf.hermann@qt.io> | 2019-05-03 12:44:42 +0000 |
commit | a4a0b030500ff568aa4faf45ca10f53db3aeb242 (patch) | |
tree | 77d1bee7f0909b9a332fd9743a7803de91b617b8 | |
parent | f6daea1a9b3afdcb7c5ae61eeac2ac20e8c88135 (diff) |
Also resolve callchain stored in branch stack, if available
When a branch stack is available (i.e. `perf record -g lbr`),
then resolve the callchain stored therein. In such cases, the
fp callchain can potentially contain kernel frames, so still
resolve those but skip any user frames therein.
The branch stack then contains the user frames: The "to"
register is the callee, the "from" register is the caller. That
means the callchain can be build by combining the first entry's
"to" register (the tail), with all "from" registers. See also
`callchain__lbr_callstack_printf` in perf's `session.c` for more
information.
Change-Id: I0e060e158859eac6c130c073255af87c365679bf
Reviewed-by: Milian Wolff <milian.wolff@kdab.com>
-rw-r--r-- | app/perfdata.h | 5 | ||||
-rw-r--r-- | app/perfunwind.cpp | 54 |
2 files changed, 43 insertions, 16 deletions
diff --git a/app/perfdata.h b/app/perfdata.h index d841843..1096314 100644 --- a/app/perfdata.h +++ b/app/perfdata.h @@ -408,8 +408,6 @@ public: QList<ReadFormat> readFormats() const { return m_readFormats; } -private: - struct BranchFlags { quint64 mispred: 1; quint64 predicted: 1; @@ -425,6 +423,9 @@ private: quint64 to; BranchFlags flags; }; + const QList<BranchEntry> &branchStack() const { return m_branchStack; } + +private: quint64 m_readFormat; quint64 m_registerMask; diff --git a/app/perfunwind.cpp b/app/perfunwind.cpp index 09aed4f..7a33f8f 100644 --- a/app/perfunwind.cpp +++ b/app/perfunwind.cpp @@ -446,8 +446,21 @@ void PerfUnwind::resolveCallchain() bool isKernel = false; bool addedUserFrames = false; PerfSymbolTable *symbols = symbolTable(m_currentUnwind.sample->pid()); - for (int i = 0; i < m_currentUnwind.sample->callchain().length(); ++i) { + + auto reportIp = [&](quint64 ip) -> bool { + symbols->attachDwfl(&m_currentUnwind); + m_currentUnwind.frames.append(symbols->lookupFrame(ip, isKernel, + &m_currentUnwind.isInterworking)); + return !symbols->cacheIsDirty(); + }; + + // when we have a non-empty branch stack, we need to skip any non-kernel IPs + // in the normal callchain. The branch stack contains the non-kernel IPs then. + const bool hasBranchStack = !m_currentUnwind.sample->branchStack().isEmpty(); + + for (int i = 0, c = m_currentUnwind.sample->callchain().size(); i < c; ++i) { quint64 ip = m_currentUnwind.sample->callchain()[i]; + if (ip > PERF_CONTEXT_MAX) { switch (ip) { case PERF_CONTEXT_HV: // hypervisor @@ -464,28 +477,41 @@ void PerfUnwind::resolveCallchain() } break; default: - qWarning() << "invalid callchain context" << ip; + qWarning() << "invalid callchain context" << hex << ip; return; } } else { + // prefer user frames from branch stack if available + if (hasBranchStack && !isKernel) + break; + + // sometimes it skips the first user frame. if (!addedUserFrames && !isKernel && ip != m_currentUnwind.sample->ip()) { - // sometimes it skips the first user frame. - symbols->attachDwfl(&m_currentUnwind); - m_currentUnwind.frames.append(symbols->lookupFrame( - m_currentUnwind.sample->ip(), false, - &m_currentUnwind.isInterworking)); + if (!reportIp(m_currentUnwind.sample->ip())) + return; } - symbols->attachDwfl(&m_currentUnwind); - m_currentUnwind.frames.append(symbols->lookupFrame( - ip, isKernel, - &m_currentUnwind.isInterworking)); + if (!reportIp(ip)) + return; + if (!isKernel) addedUserFrames = true; } + } + + // when we are still in the kernel, we cannot have a meaningful branch stack + if (isKernel) + return; - if (symbols->cacheIsDirty()) - break; + // if available, also resolve the callchain stored in the branch stack: + // caller is stored in "from", callee is stored in "to" + // so the branch is made up of the first callee and all callers + for (int i = 0, c = m_currentUnwind.sample->branchStack().size(); i < c; ++i) { + const auto& entry = m_currentUnwind.sample->branchStack()[i]; + if (i == 0 && !reportIp(entry.to)) + return; + if (!reportIp(entry.from)) + return; } } @@ -574,7 +600,7 @@ void PerfUnwind::analyze(const PerfRecordSample &sample) m_currentUnwind.frames.clear(); userSymbols->updatePerfMap(); - if (sample.callchain().length() > 0) + if (!sample.callchain().isEmpty() || !sample.branchStack().isEmpty()) resolveCallchain(); bool userDirty = userSymbols->cacheIsDirty(); |