From ea8e48a6799cf742ea23f4a30dcfc38a4988fe56 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Thu, 19 Dec 2013 23:32:04 -0800 Subject: Update the qHash function for strings to use the CRC32 instruction According to my profiling of Qt Creator, qHash and the SHA-1 calculation are the hottest spots remaining in QtCore. The current qHash function is not really vectorizable. We could come up with a different algorithm that is more SIMD-friendly, but since we have the CRC32 instruction that can read 32- and 64-bit entities, we're set. This commit also updates the benchmark for QHash and benchmarks both the hashing function itself and the QHash class. The updated benchmarks for the CRC32 on my machine shows that the hashing function is *always* improved, but the hashing isn't always. In particular, the current algorithm is better for the "numbers" case, for which the data sample differs in very few bits. The new code is 33% slower for that particular case. On average, the improvement (including the "numbers" case) is: compared to qHash only QHash Qt 5.0 function 2.54x 1.06x Qt 4.x function 4.34x 1.34x Java function 2.71x 1.11x Test machine: Sandybridge Core i7-2620M @ 2.66 GHz with turbo disabled for the benchmarks Change-Id: Ia80b98c0e20d785816f7a7f6ddf40b4b302c7297 Reviewed-by: Oswald Buddenhagen Reviewed-by: Lars Knoll Reviewed-by: Thiago Macieira --- tests/benchmarks/corelib/tools/qhash/main.cpp | 42 ++++++++++++++-------- tests/benchmarks/corelib/tools/qhash/main.h | 10 ++++++ tests/benchmarks/corelib/tools/qhash/outofline.cpp | 10 ++++++ 3 files changed, 48 insertions(+), 14 deletions(-) (limited to 'tests') diff --git a/tests/benchmarks/corelib/tools/qhash/main.cpp b/tests/benchmarks/corelib/tools/qhash/main.cpp index a39ced19fe..b173724aed 100644 --- a/tests/benchmarks/corelib/tools/qhash/main.cpp +++ b/tests/benchmarks/corelib/tools/qhash/main.cpp @@ -55,13 +55,28 @@ class tst_QHash : public QObject private slots: void initTestCase(); + void qhash_current_data() { data(); } + void qhash_current() { qhash_template(); } + void qhash_qt50_data() { data(); } + void qhash_qt50() { qhash_template(); } void qhash_qt4_data() { data(); } - void qhash_qt4(); - void javaString_data() { data(); } - void javaString(); + void qhash_qt4() { qhash_template(); } + void qhash_javaString_data() { data(); } + void qhash_javaString() { qhash_template(); } + + void hashing_current_data() { data(); } + void hashing_current() { hashing_template(); } + void hashing_qt50_data() { data(); } + void hashing_qt50() { hashing_template(); } + void hashing_qt4_data() { data(); } + void hashing_qt4() { hashing_template(); } + void hashing_javaString_data() { data(); } + void hashing_javaString() { hashing_template(); } private: void data(); + template void qhash_template(); + template void hashing_template(); QStringList smallFilePaths; QStringList uuids; @@ -76,7 +91,7 @@ private: void tst_QHash::initTestCase() { // small list of file paths - QFile smallPathsData("paths_small_data.txt"); + QFile smallPathsData(QFINDTESTDATA("paths_small_data.txt")); QVERIFY(smallPathsData.open(QIODevice::ReadOnly)); smallFilePaths = QString::fromLatin1(smallPathsData.readAll()).split(QLatin1Char('\n')); QVERIFY(!smallFilePaths.isEmpty()); @@ -133,12 +148,12 @@ void tst_QHash::data() QTest::newRow("numbers") << numbers; } -void tst_QHash::qhash_qt4() +template void tst_QHash::qhash_template() { QFETCH(QStringList, items); - QHash hash; + QHash hash; - QList realitems; + QList realitems; foreach (const QString &s, items) realitems.append(s); @@ -149,23 +164,22 @@ void tst_QHash::qhash_qt4() } } -void tst_QHash::javaString() +template void tst_QHash::hashing_template() { + // just the hashing function QFETCH(QStringList, items); - QHash hash; - QList realitems; + QVector realitems; + realitems.reserve(items.size()); foreach (const QString &s, items) realitems.append(s); QBENCHMARK { - for (int i = 0, n = realitems.size(); i != n; ++i) { - hash[realitems.at(i)] = i; - } + for (int i = 0, n = realitems.size(); i != n; ++i) + (void)qHash(realitems.at(i)); } } - QTEST_MAIN(tst_QHash) #include "main.moc" diff --git a/tests/benchmarks/corelib/tools/qhash/main.h b/tests/benchmarks/corelib/tools/qhash/main.h index bd3f0db12d..86a1a3d09b 100644 --- a/tests/benchmarks/corelib/tools/qhash/main.h +++ b/tests/benchmarks/corelib/tools/qhash/main.h @@ -51,6 +51,16 @@ QT_BEGIN_NAMESPACE uint qHash(const Qt4String &); QT_END_NAMESPACE +struct Qt50String : QString +{ + Qt50String() {} + Qt50String(const QString &s) : QString(s) {} +}; + +QT_BEGIN_NAMESPACE +uint qHash(const Qt50String &, uint seed = 0); +QT_END_NAMESPACE + struct JavaString : QString { diff --git a/tests/benchmarks/corelib/tools/qhash/outofline.cpp b/tests/benchmarks/corelib/tools/qhash/outofline.cpp index 9ccfc11224..3a2278503d 100644 --- a/tests/benchmarks/corelib/tools/qhash/outofline.cpp +++ b/tests/benchmarks/corelib/tools/qhash/outofline.cpp @@ -57,6 +57,16 @@ uint qHash(const Qt4String &str) return h; } +uint qHash(const Qt50String &key, uint seed) +{ + const QChar *p = key.unicode(); + int len = key.size(); + uint h = seed; + for (int i = 0; i < len; ++i) + h = 31 * h + p[i].unicode(); + return h; +} + // The Java's hashing algorithm for strings is a variation of D. J. Bernstein // hashing algorithm appeared here http://cr.yp.to/cdb/cdb.txt // and informally known as DJB33XX - DJB's 33 Times Xor. -- cgit v1.2.3