summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMilian Wolff <milian.wolff@kdab.com>2016-02-23 19:07:37 +0100
committerMilian Wolff <milian.wolff@kdab.com>2016-03-03 09:55:56 +0000
commit8619214c5e76c70e32b47cd002be1adb1bc2f5bf (patch)
treee35e9af7783fc90171c531521c190cc16e03dfa0 /src
parentfe04aba46df291389e462d9e89387a4e4547b33f (diff)
Optimize QMetaObject::activate.
The code is restructured to only loop over the non-empty connection lists. This can be checked early while the mutex is locked already, thus removing mutex lock/unlock calls that were done previously just to realize the allsignals list is empty and can be skipped. Additionally, at the very end of the loop over the last signal connection list, the mutex was acquired even though it doesn't need to be as we will quit the loop anyways. This patch leverages these facts to remove the mutex locks which alone has a considerable impact on the corresponding signal/slot benchmark. The instruction count goes down by ca. 13%, while cycle count and runtime drop by about 29%. Before: ********* Start testing of QObjectBenchmark ********* Config: Using QtTest library 5.7.0, Qt 5.7.0 (x86_64-little_endian-lp64 shared (dynamic) release build; by GCC 5.3.0) PASS : QObjectBenchmark::initTestCase() PASS : QObjectBenchmark::signal_slot_benchmark(simple function) RESULT : QObjectBenchmark::signal_slot_benchmark():"simple function": 0.00000362 msecs per iteration (total: 362, iterations: 100000000) 14.05652884 CPU cycles per iteration (total: 1,405,652,884, iterations: 100000000) 21.00585673 instructions per iteration (total: 2,100,585,673, iterations: 100000000) PASS : QObjectBenchmark::signal_slot_benchmark(single signal/slot) RESULT : QObjectBenchmark::signal_slot_benchmark():"single signal/slot": 0.00004709 msecs per iteration (total: 4,709, iterations: 100000000) 183.75943370 CPU cycles per iteration (total: 18,375,943,371, iterations: 100000000) 362.08604759 instructions per iteration (total: 36,208,604,760, iterations: 100000000) PASS : QObjectBenchmark::signal_slot_benchmark(multi signal/slot) RESULT : QObjectBenchmark::signal_slot_benchmark():"multi signal/slot": 0.00004965 msecs per iteration (total: 4,965, iterations: 100000000) 183.54556242 CPU cycles per iteration (total: 18,354,556,243, iterations: 100000000) 362.07734835 instructions per iteration (total: 36,207,734,835, iterations: 100000000) PASS : QObjectBenchmark::signal_slot_benchmark(unconnected signal) RESULT : QObjectBenchmark::signal_slot_benchmark():"unconnected signal": 0.00000752 msecs per iteration (total: 752, iterations: 100000000) 30.08781366 CPU cycles per iteration (total: 3,008,781,367, iterations: 100000000) 92.01520465 instructions per iteration (total: 9,201,520,466, iterations: 100000000) PASS : QObjectBenchmark::signal_slot_benchmark(single signal/ptr) RESULT : QObjectBenchmark::signal_slot_benchmark():"single signal/ptr": 0.00005620 msecs per iteration (total: 5,620, iterations: 100000000) 219.24739264 CPU cycles per iteration (total: 21,924,739,265, iterations: 100000000) 327.08675555 instructions per iteration (total: 32,708,675,556, iterations: 100000000) PASS : QObjectBenchmark::signal_slot_benchmark(functor) RESULT : QObjectBenchmark::signal_slot_benchmark():"functor": 0.00005852 msecs per iteration (total: 5,852, iterations: 100000000) 218.45401359 CPU cycles per iteration (total: 21,845,401,360, iterations: 100000000) 328.08472410 instructions per iteration (total: 32,808,472,410, iterations: 100000000) PASS : QObjectBenchmark::cleanupTestCase() Totals: 8 passed, 0 failed, 0 skipped, 0 blacklisted, 44469ms ********* Finished testing of QObjectBenchmark ********* After: ********* Start testing of QObjectBenchmark ********* Config: Using QtTest library 5.7.0, Qt 5.7.0 (x86_64-little_endian-lp64 shared (dynamic) release build; by GCC 5.3.0) PASS : QObjectBenchmark::initTestCase() PASS : QObjectBenchmark::signal_slot_benchmark(simple function) RESULT : QObjectBenchmark::signal_slot_benchmark():"simple function": 0.00000361 msecs per iteration (total: 361, iterations: 100000000) 14.01854817 CPU cycles per iteration (total: 1,401,854,818, iterations: 100000000) 21.00532932 instructions per iteration (total: 2,100,532,933, iterations: 100000000) PASS : QObjectBenchmark::signal_slot_benchmark(single signal/slot) RESULT : QObjectBenchmark::signal_slot_benchmark():"single signal/slot": 0.00003398 msecs per iteration (total: 3,398, iterations: 100000000) 132.52735104 CPU cycles per iteration (total: 13,252,735,104, iterations: 100000000) 314.04965106 instructions per iteration (total: 31,404,965,107, iterations: 100000000) PASS : QObjectBenchmark::signal_slot_benchmark(multi signal/slot) RESULT : QObjectBenchmark::signal_slot_benchmark():"multi signal/slot": 0.00003448 msecs per iteration (total: 3,448, iterations: 100000000) 133.63623046 CPU cycles per iteration (total: 13,363,623,046, iterations: 100000000) 314.04952237 instructions per iteration (total: 31,404,952,238, iterations: 100000000) PASS : QObjectBenchmark::signal_slot_benchmark(unconnected signal) RESULT : QObjectBenchmark::signal_slot_benchmark():"unconnected signal": 0.00000747 msecs per iteration (total: 747, iterations: 100000000) 29.02349389 CPU cycles per iteration (total: 2,902,349,390, iterations: 100000000) 92.01088221 instructions per iteration (total: 9,201,088,222, iterations: 100000000) PASS : QObjectBenchmark::signal_slot_benchmark(single signal/ptr) RESULT : QObjectBenchmark::signal_slot_benchmark():"single signal/ptr": 0.00004350 msecs per iteration (total: 4,350, iterations: 100000000) 167.83581885 CPU cycles per iteration (total: 16,783,581,885, iterations: 100000000) 279.06426656 instructions per iteration (total: 27,906,426,657, iterations: 100000000) PASS : QObjectBenchmark::signal_slot_benchmark(functor) RESULT : QObjectBenchmark::signal_slot_benchmark():"functor": 0.00004337 msecs per iteration (total: 4,337, iterations: 100000000) 170.45074743 CPU cycles per iteration (total: 17,045,074,743, iterations: 100000000) 280.06267229 instructions per iteration (total: 28,006,267,229, iterations: 100000000) PASS : QObjectBenchmark::cleanupTestCase() Totals: 8 passed, 0 failed, 0 skipped, 0 blacklisted, 33228ms ********* Finished testing of QObjectBenchmark ********* Change-Id: I6f79fd68ae7a07d9b439ca047bf1f53c83751d45 Reviewed-by: Olivier Goffart (Woboq GmbH) <ogoffart@woboq.com>
Diffstat (limited to 'src')
-rw-r--r--src/corelib/kernel/qobject.cpp49
1 files changed, 30 insertions, 19 deletions
diff --git a/src/corelib/kernel/qobject.cpp b/src/corelib/kernel/qobject.cpp
index e3e536d7e1..a6baff8a49 100644
--- a/src/corelib/kernel/qobject.cpp
+++ b/src/corelib/kernel/qobject.cpp
@@ -3662,15 +3662,31 @@ void QMetaObject::activate(QObject *sender, int signalOffset, int local_signal_i
return;
}
- const QObjectPrivate::ConnectionList *list;
- if (signal_index < connectionLists->count())
- list = &connectionLists->at(signal_index);
- else
- list = &connectionLists->allsignals;
+ // contains the non-empty connection lists
+ const QObjectPrivate::ConnectionList *lists[2];
+ int numLists = 0;
+ if (signal_index < connectionLists->count()) {
+ const auto *list = &connectionLists->at(signal_index);
+ if (list->first) // only add if non-empty
+ lists[numLists++] = list;
+ }
+ if (connectionLists->allsignals.first) // only add if non-empty
+ lists[numLists++] = &connectionLists->allsignals;
+
+ for (int i = 0; i < numLists; ++i) {
+ const auto *list = lists[i];
+ if (i == 0) {
+ // on the first iteration, the mutex must be locked already
+ Q_ASSERT(!locker.mutex()->tryLock());
+ } else {
+ // otherwise the mutex is unlocked and must be relocked
+ locker.relock();
+ if (connectionLists->orphaned)
+ break;
+ }
- do {
QObjectPrivate::Connection *c = list->first;
- if (!c) continue;
+ Q_ASSERT(c);
// We need to check against last here to ensure that signals added
// during the signal emission are not emitted in this emission.
QObjectPrivate::Connection *last = list->last;
@@ -3723,8 +3739,6 @@ void QMetaObject::activate(QObject *sender, int signalOffset, int local_signal_i
// destructor of the slot object might also lock a mutex from the signalSlotLock() mutex pool,
// and that would deadlock if the pool happens to return the same mutex.
obj.reset();
-
- locker.relock();
} else if (c->callFunction && c->method_offset <= receiver->metaObject()->methodOffset()) {
//we compare the vtable to make sure we are not in the destructor of the object.
const int methodIndex = c->method();
@@ -3738,7 +3752,6 @@ void QMetaObject::activate(QObject *sender, int signalOffset, int local_signal_i
if (qt_signal_spy_callback_set.slot_end_callback != 0)
qt_signal_spy_callback_set.slot_end_callback(receiver, methodIndex);
- locker.relock();
} else {
const int method = c->method_relative + c->method_offset;
locker.unlock();
@@ -3753,19 +3766,17 @@ void QMetaObject::activate(QObject *sender, int signalOffset, int local_signal_i
if (qt_signal_spy_callback_set.slot_end_callback != 0)
qt_signal_spy_callback_set.slot_end_callback(receiver, method);
-
- locker.relock();
}
- if (connectionLists->orphaned)
+ if (c == last) // early break without relock for the last signal
break;
- } while (c != last && (c = c->nextConnectionList) != 0);
- if (connectionLists->orphaned)
- break;
- } while (list != &connectionLists->allsignals &&
- //start over for all signals;
- ((list = &connectionLists->allsignals), true));
+ locker.relock();
+
+ if (connectionLists->orphaned)
+ break;
+ } while ((c = c->nextConnectionList) != 0);
+ }
}