summaryrefslogtreecommitdiffstats
path: root/app/perfunwind.cpp
blob: 8dfcc7f88b35ed88977ca900fe957e42f4bf6f2f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
/****************************************************************************
**
** Copyright (C) 2015 The Qt Company Ltd
** All rights reserved.
** For any questions to The Qt Company, please use contact form at http://www.qt.io/contact-us
**
** This file is part of the Qt Enterprise Perf Profiler Add-on.
**
** GNU General Public License Usage
** This file may be used under the terms of the GNU General Public License
** version 3 as published by the Free Software Foundation and appearing in
** the file LICENSE.GPLv3 included in the packaging of this file. Please
** review the following information to ensure the GNU General Public License
** requirements will be met: https://www.gnu.org/licenses/gpl.html.
**
** If you have questions regarding the use of this file, please use
** contact form at http://www.qt.io/contact-us
**
****************************************************************************/

#include "perfunwind.h"
#include "perfregisterinfo.h"
#include "perfsymboltable.h"

#include <QDebug>
#include <QtEndian>

#include <cstring>

uint qHash(const PerfUnwind::Location &location, uint seed)
{
    QtPrivate::QHashCombine hash;
    seed = hash(seed, location.address);
    seed = hash(seed, location.file);
    seed = hash(seed, location.pid);
    seed = hash(seed, location.line);
    seed = hash(seed, location.column);
    return seed;
}

bool operator==(const PerfUnwind::Location &a, const PerfUnwind::Location &b)
{
    return a.address == b.address && a.file == b.file && a.pid == b.pid && a.line == b.line
            && a.column == b.column;
}

PerfUnwind::PerfUnwind(QIODevice *output, const QString &systemRoot, const QString &debugPath,
                       const QString &extraLibsPath, const QString &appPath,
                       const QString &kallsymsPath) :
    m_output(output), m_architecture(PerfRegisterInfo::ARCH_INVALID), m_systemRoot(systemRoot),
    m_extraLibsPath(extraLibsPath), m_appPath(appPath), m_kallsyms(kallsymsPath),
    m_sampleBufferSize(0)
{
    m_currentUnwind.unwind = this;
    m_offlineCallbacks.find_elf = dwfl_build_id_find_elf;
    m_offlineCallbacks.find_debuginfo =  dwfl_standard_find_debuginfo;
    m_offlineCallbacks.section_address = dwfl_offline_section_address;
    const QChar colon = QLatin1Char(':');
    QByteArray newDebugInfo = (colon + debugPath + colon + appPath + colon + extraLibsPath + colon
                               + systemRoot).toUtf8();
    m_debugInfoPath = new char[newDebugInfo.length() + 1];
    m_debugInfoPath[newDebugInfo.length()] = 0;
    std::memcpy(m_debugInfoPath, newDebugInfo.data(), newDebugInfo.length());
    m_offlineCallbacks.debuginfo_path = &m_debugInfoPath;

    // Write minimal header, consisting of magic and data stream version we're going to use.
    const char magic[] = "QPERFSTREAM";
    output->write(magic, sizeof(magic));
    qint32 dataStreamVersion = qToLittleEndian(QDataStream::Qt_DefaultCompiledVersion);
    output->write(reinterpret_cast<const char *>(&dataStreamVersion), sizeof(qint32));
}

PerfUnwind::~PerfUnwind()
{
    foreach (const PerfRecordSample &sample, m_sampleBuffer)
        analyze(sample);

    delete[] m_debugInfoPath;
    qDeleteAll(m_symbolTables);
}

PerfSymbolTable *PerfUnwind::symbolTable(quint32 pid)
{
    PerfSymbolTable *&symbolTable = m_symbolTables[pid];
    if (!symbolTable)
        symbolTable = new PerfSymbolTable(pid, &m_offlineCallbacks, this);
    return symbolTable;
}

Dwfl *PerfUnwind::dwfl(quint32 pid, quint64 timestamp)
{
    return symbolTable(pid)->attachDwfl(timestamp, &m_currentUnwind);
}

void PerfUnwind::registerElf(const PerfRecordMmap &mmap)
{
    symbolTable(mmap.pid())->registerElf(mmap, m_appPath, m_systemRoot, m_extraLibsPath);
}

void PerfUnwind::sendBuffer(const QByteArray &buffer)
{
    quint32 size = qToLittleEndian(buffer.length());
    m_output->write(reinterpret_cast<char *>(&size), sizeof(quint32));
    m_output->write(buffer);
}

void PerfUnwind::comm(const PerfRecordComm &comm)
{
    const qint32 commId = resolveString(comm.comm());
    QByteArray buffer;
    QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(Command)
                                               << comm.pid() << comm.tid()  << comm.time()
                                               << commId;
    sendBuffer(buffer);
}

void PerfUnwind::attr(const PerfRecordAttr &attr)
{
    addAttributes(attr.attr(), attr.attr().name(), attr.ids());
}

void PerfUnwind::addAttributes(const PerfEventAttributes &attributes, const QByteArray &name,
                              const QList<quint64> &ids)
{
    const qint32 internalId = resolveAttributes(attributes, name);

    if (ids.isEmpty()) {
        // If we only get one attribute, it doesn't have an ID.
        // The default ID for samples is 0, so we assign that here,
        // in order to look it up in analyze().
        m_attributeIds[0] = internalId;
    } else {
        foreach (quint64 id, ids)
            m_attributeIds[id] = internalId;
    }
}

qint32 PerfUnwind::resolveAttributes(const PerfEventAttributes &attributes, const QByteArray &name)
{
    auto it = m_attributes.find(attributes);
    if (it == m_attributes.end()) {
        it = m_attributes.insert(attributes, m_attributes.size());
        sendAttributes(it.value(), attributes, name);
    }
    return it.value();
}

void PerfUnwind::sendAttributes(qint32 id, const PerfEventAttributes &attributes, const QByteArray &name)
{
    const qint32 attrNameId = resolveString(name);

    QByteArray buffer;
    QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(AttributesDefinition)
                                               << id << attributes.type()
                                               << attributes.config() << attrNameId;
    sendBuffer(buffer);
}

void PerfUnwind::lost(const PerfRecordLost &lost)
{
    QByteArray buffer;
    QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(LostDefinition)
                                               << lost.pid() << lost.tid() << lost.time();
    sendBuffer(buffer);
}

void PerfUnwind::features(const PerfFeatures &features)
{
    const auto &eventDescs = features.eventDesc().eventDescs;
    for (const auto &desc : eventDescs)
        addAttributes(desc.attrs, desc.name, desc.ids);

    QByteArray buffer;
    QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(FeaturesDefinition)
                                               << features.hostName()
                                               << features.osRelease()
                                               << features.version()
                                               << features.architecture()
                                               << features.nrCpus()
                                               << features.cpuDesc()
                                               << features.cpuId()
                                               << features.totalMem()
                                               << features.cmdline()
                                               << features.buildId()
                                               << features.cpuTopology()
                                               << features.numaTopology()
                                               << features.branchStack()
                                               << features.pmuMappings()
                                               << features.groupDesc();
    sendBuffer(buffer);
}

Dwfl_Module *PerfUnwind::reportElf(quint64 ip, quint32 pid, quint64 timestamp)
{
    auto symbols = symbolTable(pid);
    return symbols->reportElf(symbols->findElf(ip, timestamp));
}

bool PerfUnwind::ipIsInKernelSpace(quint64 ip) const
{
    auto symbolTableIt = m_symbolTables.constFind(quint32(s_kernelPid));
    if (symbolTableIt == m_symbolTables.constEnd())
        return false;

    return symbolTableIt.value()->containsAddress(ip);
}

QDataStream &operator<<(QDataStream &stream, const PerfUnwind::Location &location)
{
    return stream << location.address << location.file << location.pid << location.line
                  << location.column << location.parentLocationId;
}

QDataStream &operator<<(QDataStream &stream, const PerfUnwind::Symbol &symbol)
{
    return stream << symbol.name << symbol.binary << symbol.isKernel;
}

static int frameCallback(Dwfl_Frame *state, void *arg)
{
    Dwarf_Addr pc = 0;
    PerfUnwind::UnwindInfo *ui = static_cast<PerfUnwind::UnwindInfo *>(arg);

    bool isactivation;
    if (!dwfl_frame_pc(state, &pc, &isactivation)
            || ui->frames.length() > PerfUnwind::s_maxFrames
            || pc == 0) {
        ui->firstGuessedFrame = ui->frames.length();
        qWarning() << dwfl_errmsg(dwfl_errno()) << ui->firstGuessedFrame;
        return DWARF_CB_ABORT;
    }

    Dwarf_Addr pc_adjusted = pc - (isactivation ? 0 : 1);

    // isKernel = false as unwinding generally only works on user code
    bool isInterworking = false;
    ui->frames.append(ui->unwind->symbolTable(ui->sample->pid())->lookupFrame(
                          pc_adjusted, ui->sample->time(), false, &isInterworking));
    if (isInterworking && ui->frames.length() == 1)
        ui->isInterworking = true;
    return DWARF_CB_OK;
}

void PerfUnwind::unwindStack(Dwfl *dwfl)
{
    dwfl_getthread_frames(dwfl, m_currentUnwind.sample->pid(), frameCallback, &m_currentUnwind);
    if (m_currentUnwind.isInterworking) {
        QVector<qint32> savedFrames = m_currentUnwind.frames;

        // If it's an ARM interworking veneer, we assume that we can find a return address in LR and
        // no stack has been used for the veneer itself.
        // The reasoning is that any symbol jumped to by the veneer has to work with or without
        // using the veneer. It needs a valid return address and when it returns the stack pointer
        // must be the same in both cases. Thus, the veneer cannot touch the stack pointer and there
        // has to be a return address in LR, provided by the caller.
        // So, just try again, and make setInitialRegisters use LR for IP.
        m_currentUnwind.frames.resize(1); // Keep the actual veneer frame
        dwfl_getthread_frames(dwfl, m_currentUnwind.sample->pid(), frameCallback, &m_currentUnwind);

        // If the LR trick didn't result in a longer stack trace than the regular unwinding, just
        // revert it.
        if (savedFrames.length() > m_currentUnwind.frames.length())
            m_currentUnwind.frames.swap(savedFrames);
    }
}

void PerfUnwind::resolveCallchain()
{
    bool isKernel = false;
    PerfSymbolTable *symbols = symbolTable(m_currentUnwind.sample->pid());
    for (int i = 0; i < m_currentUnwind.sample->callchain().length(); ++i) {
        quint64 ip = m_currentUnwind.sample->callchain()[i];
        if (ip > PERF_CONTEXT_MAX) {
            switch (ip) {
            case PERF_CONTEXT_HV: // hypervisor
            case PERF_CONTEXT_KERNEL:
                if (!isKernel) {
                    symbols = symbolTable(s_kernelPid);
                    isKernel = true;
                }
                break;
            case PERF_CONTEXT_USER:
                if (isKernel) {
                    symbols = symbolTable(m_currentUnwind.sample->pid());
                    isKernel = false;
                }
                break;
            default:
                qWarning() << "invalid callchain context" << ip;
                return;
            }
        }

        // sometimes it skips the first user frame.
        if (i == 0 && !isKernel && ip != m_currentUnwind.sample->ip()) {
            m_currentUnwind.frames.append(symbols->lookupFrame(
                                              m_currentUnwind.sample->ip(),
                                              m_currentUnwind.sample->time(), false,
                                              &m_currentUnwind.isInterworking));
        }

        if (ip <= PERF_CONTEXT_MAX) {
            m_currentUnwind.frames.append(symbols->lookupFrame(
                                              ip, m_currentUnwind.sample->time(), isKernel,
                                              &m_currentUnwind.isInterworking));
        }
    }
}

void PerfUnwind::sample(const PerfRecordSample &sample)
{
    m_sampleBuffer.append(sample);
    m_sampleBufferSize += sample.size();

    while (m_sampleBufferSize > s_maxSampleBufferSize) {
        const PerfRecordSample &sample = m_sampleBuffer.front();
        m_sampleBufferSize -= sample.size();
        analyze(sample);
        m_sampleBuffer.removeFirst();
    }
}

void PerfUnwind::analyze(const PerfRecordSample &sample)
{
    m_currentUnwind.isInterworking = false;
    m_currentUnwind.firstGuessedFrame = -1;
    m_currentUnwind.sample = &sample;
    m_currentUnwind.frames.clear();

    const bool isKernel = ipIsInKernelSpace(sample.ip());

    PerfSymbolTable *userSymbols = symbolTable(sample.pid());
    userSymbols->updatePerfMap();

    // Do this before any lookupFrame() calls; we want to clear the caches if timestamps reset.
    Dwfl *userDwfl = userSymbols->attachDwfl(sample.time(), &m_currentUnwind);
    if (sample.callchain().length() > 0)
        resolveCallchain();

    if (userDwfl && sample.registerAbi() != 0 && sample.userStack().length() > 0)
        unwindStack(userDwfl);

    // If nothing was found, at least look up the IP
    if (m_currentUnwind.frames.isEmpty()) {
        PerfSymbolTable *symbols = isKernel ? symbolTable(s_kernelPid) : userSymbols;
        m_currentUnwind.frames.append(symbols->lookupFrame(sample.ip(), sample.time(), isKernel,
                                                           &m_currentUnwind.isInterworking));
    }

    const quint8 numGuessedFrames = (m_currentUnwind.firstGuessedFrame == -1)
            ? 0 : m_currentUnwind.frames.length() - m_currentUnwind.firstGuessedFrame;
    QByteArray buffer;
    QDataStream(&buffer, QIODevice::WriteOnly)
            << static_cast<quint8>(Sample) << sample.pid()
            << sample.tid() << sample.time() << m_currentUnwind.frames
            << numGuessedFrames << m_attributeIds.value(sample.id(), -1);
    sendBuffer(buffer);
}

void PerfUnwind::fork(const PerfRecordFork &sample)
{
    QByteArray buffer;
    QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(ThreadStart)
                                               << sample.childPid() << sample.childTid()
                                               << sample.time();
    sendBuffer(buffer);
}

void PerfUnwind::exit(const PerfRecordExit &sample)
{
    QByteArray buffer;
    QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(ThreadEnd)
                                               << sample.childPid() << sample.childTid()
                                               << sample.time();
    sendBuffer(buffer);
}

void PerfUnwind::sendString(qint32 id, const QByteArray& string)
{
    QByteArray buffer;
    QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(StringDefinition)
                                               << id << string;
    sendBuffer(buffer);
}

void PerfUnwind::sendLocation(qint32 id, const PerfUnwind::Location &location)
{
    QByteArray buffer;
    QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(LocationDefinition)
                                               << id << location;
    sendBuffer(buffer);
}

void PerfUnwind::sendSymbol(qint32 id, const PerfUnwind::Symbol &symbol)
{
    QByteArray buffer;
    QDataStream(&buffer, QIODevice::WriteOnly) << static_cast<quint8>(SymbolDefinition)
                                               << id << symbol;
    sendBuffer(buffer);
}

qint32 PerfUnwind::resolveString(const QByteArray& string)
{
    if (string.isEmpty())
        return -1;
    auto stringIt = m_strings.find(string);
    if (stringIt == m_strings.end()) {
        stringIt = m_strings.insert(string, m_strings.size());
        sendString(stringIt.value(), string);
    }
    return stringIt.value();
}

int PerfUnwind::lookupLocation(const PerfUnwind::Location &location) const
{
    return m_locations.value(location, -1);
}

int PerfUnwind::resolveLocation(const Location &location)
{
    auto symbolLocationIt = m_locations.find(location);
    if (symbolLocationIt == m_locations.end()) {
        symbolLocationIt = m_locations.insert(location, m_locations.size());
        sendLocation(symbolLocationIt.value(), location);
    }
    return symbolLocationIt.value();
}

bool PerfUnwind::hasSymbol(int locationId) const
{
    return m_symbols.contains(locationId);
}

void PerfUnwind::resolveSymbol(int locationId, const PerfUnwind::Symbol &symbol)
{
    m_symbols.insert(locationId, symbol);
    sendSymbol(locationId, symbol);
}

PerfKallsymEntry PerfUnwind::findKallsymEntry(quint64 address) const
{
    return m_kallsyms.findEntry(address);
}