summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorUlf Hermann <ulf.hermann@theqtcompany.com>2016-03-17 12:22:24 +0100
committerUlf Hermann <ulf.hermann@theqtcompany.com>2016-03-22 14:59:56 +0000
commitcd37b339b6562ab67e5cc91009f603569599b177 (patch)
tree715aa5e861fa96be95a6773935b0923fa485645b
parent99c95d84956aba5295e61a814f6cb0c87bf69b52 (diff)
Avoid redundant symbol table lookupsv4.0.1v4.0.0-rc1v4.0.0-beta1v4.0.0
Caching the results of symbol table lookups makes the application run 100x faster, literally. Change-Id: I2a93db66108452d29cbd1fd9faa2ba0b7def222b Task-number: QCE-69 Reviewed-by: Pasi Petäjäjärvi <pasi.petajajarvi@theqtcompany.com> Reviewed-by: Joerg Bornemann <joerg.bornemann@theqtcompany.com>
-rw-r--r--app/perfunwind.cpp74
-rw-r--r--app/perfunwind.h6
2 files changed, 44 insertions, 36 deletions
diff --git a/app/perfunwind.cpp b/app/perfunwind.cpp
index ae13b92..4170a8d 100644
--- a/app/perfunwind.cpp
+++ b/app/perfunwind.cpp
@@ -318,38 +318,46 @@ static const Dwfl_Thread_Callbacks callbacks = {
nextThread, NULL, memoryRead, setInitialRegisters, NULL, NULL
};
-static PerfUnwind::Frame lookupSymbol(PerfUnwind::UnwindInfo *ui, Dwfl *dwfl, Dwarf_Addr ip,
- bool isKernel)
+PerfUnwind::Frame PerfUnwind::lookupSymbol(Dwarf_Addr ip, bool isKernel)
{
+ quint32 pid = currentUnwind.sample->pid();
Dwfl_Module *mod = dwfl ? dwfl_addrmodule(dwfl, ip) : 0;
- QByteArray symname;
QByteArray elfFile;
- QByteArray srcFile;
- int line = 0;
- int column = 0;
- GElf_Sym sym;
- GElf_Off off = 0;
if (dwfl && !mod) {
- const PerfUnwind::ElfInfo *elfInfo = 0;
- mod = ui->unwind->reportElf(ip, isKernel ? PerfUnwind::s_kernelPid : ui->sample->pid(),
- &elfInfo);
+ const ElfInfo *elfInfo = 0;
+ mod = reportElf(ip, isKernel ? s_kernelPid : pid, &elfInfo);
if (!mod && elfInfo)
elfFile = elfInfo->file.fileName().toLocal8Bit();
}
- Dwarf_Addr adjusted = (ui->unwind->architecture() != PerfRegisterInfo::ARCH_ARM || (ip & 1)) ?
- ip : ip + 1;
+ Dwarf_Addr adjusted = (architecture() != PerfRegisterInfo::ARCH_ARM || (ip & 1)) ? ip : ip + 1;
+ if (mod)
+ elfFile = dwfl_module_info(mod, 0, 0, 0, 0, 0, 0, 0);
+
+ QHash<Dwarf_Addr, Frame> &processAddrCache = addrCache[pid];
+ auto it = processAddrCache.constFind(ip);
+ // Check for elfFile as it might have loaded a different file to the same address in the mean
+ // time. We don't consider the case of loading the same file again at a different, overlapping
+ // offset.
+ if (it != processAddrCache.constEnd() && it->elfFile == elfFile)
+ return *it;
+
+ QByteArray symname;
+ QByteArray srcFile;
+ int line = 0;
+ int column = 0;
+ GElf_Sym sym;
+ GElf_Off off = 0;
+
if (mod) {
// For addrinfo we need the raw pointer into symtab, so we need to adjust ourselves.
symname = dwfl_module_addrinfo(mod, adjusted, &off, &sym, 0, 0, 0);
if (off == adjusted) // no symbol found
off = 0;
- else if (ui->unwind->granularity() == PerfUnwind::Function)
+ else if (granularity() == Function)
adjusted -= off;
- elfFile = dwfl_module_info(mod, 0, 0, 0, 0, 0, 0, 0);
-
Dwfl_Line *srcLine = dwfl_module_getsrc(mod, adjusted);
if (srcLine)
srcFile = dwfl_lineinfo(srcLine, NULL, &line, &column, NULL, NULL);
@@ -360,20 +368,23 @@ static PerfUnwind::Frame lookupSymbol(PerfUnwind::UnwindInfo *ui, Dwfl *dwfl, Dw
int status = -1;
if (symname[0] == '_' && symname[1] == 'Z')
demangled = abi::__cxa_demangle(symname, 0, 0, &status);
- else if (ui->unwind->architecture() == PerfRegisterInfo::ARCH_ARM && symname[0] == '$'
+ else if (architecture() == PerfRegisterInfo::ARCH_ARM && symname[0] == '$'
&& (symname[1] == 'a' || symname[1] == 't') && symname[2] == '\0')
- ui->isInterworking = true;
+ currentUnwind.isInterworking = true;
// Adjust it back. The symtab entries are 1 off for all practical purposes.
- PerfUnwind::Frame frame(adjusted, isKernel, status == 0 ? QByteArray(demangled) : symname,
- elfFile, srcFile, line, column);
+ Frame frame(adjusted, isKernel, status == 0 ? QByteArray(demangled) : symname, elfFile,
+ srcFile, line, column);
free(demangled);
+ processAddrCache.insert(ip, frame);
return frame;
} else {
- symname = ui->unwind->symbolFromPerfMap(adjusted, ui->sample->pid(), &off);
- if (ui->unwind->granularity() == PerfUnwind::Function)
+ symname = symbolFromPerfMap(adjusted, pid, &off);
+ if (granularity() == Function)
adjusted -= off;
- return PerfUnwind::Frame(adjusted, isKernel, symname, elfFile, srcFile, line, column);
+ Frame frame(adjusted, isKernel, symname, elfFile, srcFile, line, column);
+ processAddrCache.insert(ip, frame);
+ return frame;
}
}
@@ -392,12 +403,8 @@ static int frameCallback(Dwfl_Frame *state, void *arg)
Dwarf_Addr pc_adjusted = pc - (isactivation ? 0 : 1);
- /* Get PC->SYMNAME. */
- Dwfl_Thread *thread = dwfl_frame_thread (state);
- Dwfl *dwfl = dwfl_thread_dwfl (thread);
-
// isKernel = false as unwinding generally only works on user code
- ui->frames.append(lookupSymbol(ui, dwfl, pc_adjusted, false));
+ ui->frames.append(ui->unwind->lookupSymbol(pc_adjusted, false));
return DWARF_CB_OK;
}
@@ -438,11 +445,10 @@ void PerfUnwind::resolveCallchain()
// sometimes it skips the first user frame.
if (i == 0 && !isKernel && ip != currentUnwind.sample->ip())
- currentUnwind.frames.append(lookupSymbol(&currentUnwind, dwfl,
- currentUnwind.sample->ip(), false));
+ currentUnwind.frames.append(lookupSymbol(currentUnwind.sample->ip(), false));
if (ip <= PERF_CONTEXT_MAX)
- currentUnwind.frames.append(lookupSymbol(&currentUnwind, dwfl, ip, isKernel));
+ currentUnwind.frames.append(lookupSymbol(ip, isKernel));
}
}
@@ -491,10 +497,8 @@ void PerfUnwind::analyze(const PerfRecordSample &sample)
unwindStack();
}
// If nothing was found, at least look up the IP
- if (currentUnwind.frames.isEmpty()) {
- currentUnwind.frames.append(lookupSymbol(&currentUnwind, dwfl, sample.ip(),
- ipIsInKernelSpace(sample.ip())));
- }
+ if (currentUnwind.frames.isEmpty())
+ currentUnwind.frames.append(lookupSymbol(sample.ip(), ipIsInKernelSpace(sample.ip())));
QByteArray buffer;
QDataStream(&buffer, QIODevice::WriteOnly)
diff --git a/app/perfunwind.h b/app/perfunwind.h
index f7a3520..2b07131 100644
--- a/app/perfunwind.h
+++ b/app/perfunwind.h
@@ -64,7 +64,7 @@ public:
struct UnwindInfo {
UnwindInfo() : frames(0), unwind(0), sample(0), broken(false), isInterworking(false) {}
QVector<PerfUnwind::Frame> frames;
- const PerfUnwind *unwind;
+ PerfUnwind *unwind;
const PerfRecordSample *sample;
bool broken;
bool isInterworking;
@@ -120,6 +120,8 @@ public:
QByteArray symbolFromPerfMap(quint64 ip, quint32 pid, GElf_Off *offset) const;
void updatePerfMap(quint32 pid);
+ Frame lookupSymbol(Dwarf_Addr ip, bool isKernel);
+
private:
enum CallchainContext {
@@ -159,6 +161,8 @@ private:
QHash<quint32, QMap<quint64, ElfInfo> > elfs; // The inner map needs to be sorted
QHash<quint32, PerfMap> perfMaps;
QList<PerfRecordSample> sampleBuffer;
+ QHash<quint32, QHash<Dwarf_Addr, Frame> > addrCache;
+
uint sampleBufferSize;
Granularity sampleGranularity;