summaryrefslogtreecommitdiffstats
path: root/tests/benchmarks/corelib/tools
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2024-02-02 22:15:56 -0800
committerThiago Macieira <thiago.macieira@intel.com>2024-03-12 18:23:09 -0700
commit55959aefab1a190435dfacfc2a136ce3314d423c (patch)
tree0577418cdae818acb80fdf5388232bdb2663080c /tests/benchmarks/corelib/tools
parent970aad541811d002e5004bd3826929247492ba09 (diff)
qHash: implement an AES hasher for QLatin1StringView
It's the same aeshash() as before, except we're passing a template parameter to indicate whether to read half and then zero-extend the data. That is, it will perform a conversion from Latin1 on the fly. When running in zero-extending mode, the length parameters are actually doubled (counting the number of UTF-16 code units) and we then divide again by 2 when advancing. The implementation should have the following performance characteristics: * QLatin1StringView now will be roughly half as fast as Qt 6.7 * QLatin1StringView now will be roughly as fast as QStringView For the aeshash128() in default builds of QtCore (will use SSE4.1), the long loop (32 characters or more) is: QStringView QLatin1StringView movdqu -0x20(%rax),%xmm4 | pmovzxbw -0x10(%rdx),%xmm2 movdqu -0x10(%rax),%xmm5 | pmovzxbw -0x8(%rdx),%xmm3 add $0x20,%rax | add $0x10,%rdx pxor %xmm4,%xmm0 | pxor %xmm2,%xmm0 pxor %xmm5,%xmm1 | pxor %xmm3,%xmm1 aesenc %xmm0,%xmm0 aesenc %xmm0,%xmm0 aesenc %xmm1,%xmm1 aesenc %xmm1,%xmm1 aesenc %xmm0,%xmm0 aesenc %xmm0,%xmm0 aesenc %xmm1,%xmm1 aesenc %xmm1,%xmm1 The number of instructions is identical, but there are actually 2 more uops per iteration. LLVM-MCA simulation shows this should execute in the same number of cycles on older CPUs that do not have support for VAES (see <https://analysis.godbolt.org/z/x95Mrfrf7>). For the VAES version in aeshash256() and the AVX10 version in aeshash256_256(): QStringView QLatin1StringView vpxor -0x40(%rax),%ymm1,%ym | vpmovzxbw -0x20(%rax),%ymm3 vpxor -0x20(%rax),%ymm0,%ym | vpmovzxbw -0x10(%rax),%ymm2 add $0x40,%rax | add $0x20,%rax | vpxor %ymm3,%ymm0,%ymm0 | vpxor %ymm2,%ymm1,%ymm1 vaesenc %ymm1,%ymm1,%ymm1 < vaesenc %ymm0,%ymm0,%ymm0 vaesenc %ymm0,%ymm0,%ymm0 vaesenc %ymm1,%ymm1,%ymm1 vaesenc %ymm1,%ymm1,%ymm1 vaesenc %ymm0,%ymm0,%ymm0 vaesenc %ymm0,%ymm0,%ymm0 > vaesenc %ymm1,%ymm1,%ymm1 In this case, the increase in number of instructions matches the increase in number of uops. The LLVM-MCA simulation says that the QLatin1StringView version is faster at 11 cycles/iteration vs 14 cyc/it (see <https://analysis.godbolt.org/z/1Gv1coz13>), but that can't be right. Measured performance of CPU cycles, on an Intel Core i9-7940X (Skylake, no VAES support), normalized on the QString performance (QByteArray is used as a stand-in for the performance in Qt 6.7): aeshash | siphash QByteArray QL1SV QString QByteArray QString dictionary 94.5% 79.7% 100.0% 150.5%* 159.8% paths-small 90.2% 93.2% 100.0% 202.8% 290.3% uuids 81.8% 100.7% 100.0% 215.2% 350.7% longstrings 42.5% 100.8% 100.0% 185.7% 353.2% numbers 95.5% 77.9% 100.0% 155.3%* 164.5% On an Intel Core i7-1165G7 (Tiger Lake, capable of VAES and AVX512VL): aeshash | siphash QByteArray QL1SV QString QByteArray QString dictionary 90.0% 91.1% 100.0% 103.3%* 157.1% paths-small 99.4% 104.8% 100.0% 237.5% 358.0% uuids 88.5% 117.6% 100.0% 274.5% 461.7% longstrings 57.4% 111.2% 100.0% 503.0% 974.3% numbers 90.6% 89.7% 100.0% 98.7%* 149.9% On an Intel 4th Generation Xeon Scalable Platinum (Sapphire Rapids, same Golden Cove core as Alder Lake): aeshash | siphash QByteArray QL1SV QString QByteArray QString dictionary 89.9% 102.1% 100.0% 158.1%* 172.7% paths-small 78.0% 89.4% 100.0% 159.4% 258.0% uuids 109.1% 107.9% 100.0% 279.0% 496.3% longstrings 52.1% 112.4% 100.0% 564.4% 1078.3% numbers 85.8% 98.9% 100.0% 152.6%* 190.4% * dictionary contains very short entries (6 characters) * paths-small contains strings of varying length, but very few over 32 * uuids-list contains fixed-length strings (38 characters) * longstrings is the same but 304 characters * numbers also a lot contains very short strings (1 to 6 chars) What this shows: * For short strings, the performance difference is negligible between all three * For longer strings, QLatin1StringView now costs between 7 and 17% more than QString on the tested machines instead of up to ~50% less, except on the older machine (where I think the main QString hashing is suffering from memory bandwidth limitations) * The AES hash implementation is anywhere from 1.6 to 11x faster than Siphash * Murmurhash (marked with asterisk) is much faster than Siphash, but it only managed to beat the AES hash in one test Change-Id: I664b9f014ffc48cbb49bfffd17b045c1811ac0ed Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org> Reviewed-by: MÃ¥rten Nordheim <marten.nordheim@qt.io>
Diffstat (limited to 'tests/benchmarks/corelib/tools')
-rw-r--r--tests/benchmarks/corelib/tools/qhash/outofline.cpp4
-rw-r--r--tests/benchmarks/corelib/tools/qhash/tst_bench_qhash.cpp42
-rw-r--r--tests/benchmarks/corelib/tools/qhash/tst_bench_qhash.h16
3 files changed, 51 insertions, 11 deletions
diff --git a/tests/benchmarks/corelib/tools/qhash/outofline.cpp b/tests/benchmarks/corelib/tools/qhash/outofline.cpp
index f44746c5b1..5b16c36ffb 100644
--- a/tests/benchmarks/corelib/tools/qhash/outofline.cpp
+++ b/tests/benchmarks/corelib/tools/qhash/outofline.cpp
@@ -5,7 +5,7 @@
QT_BEGIN_NAMESPACE
-size_t qHash(const Qt4String &str)
+size_t qHash(const Qt4String &str, size_t /* never used */)
{
qsizetype n = str.size();
const QChar *p = str.unicode();
@@ -40,7 +40,7 @@ size_t qHash(const Qt50String &key, size_t seed)
// Still, we can avoid writing the multiplication as "(h << 5) - h"
// -- the compiler will turn it into a shift and an addition anyway
// (for instance, gcc 4.4 does that even at -O0).
-size_t qHash(const JavaString &str)
+size_t qHash(const JavaString &str, size_t /* never used */)
{
const auto *p = reinterpret_cast<const char16_t *>(str.constData());
const qsizetype len = str.size();
diff --git a/tests/benchmarks/corelib/tools/qhash/tst_bench_qhash.cpp b/tests/benchmarks/corelib/tools/qhash/tst_bench_qhash.cpp
index f6214dfa36..1a62a48437 100644
--- a/tests/benchmarks/corelib/tools/qhash/tst_bench_qhash.cpp
+++ b/tests/benchmarks/corelib/tools/qhash/tst_bench_qhash.cpp
@@ -13,6 +13,8 @@
#include <QUuid>
#include <QTest>
+static constexpr quint64 RandomSeed32 = 1045982819;
+static constexpr quint64 RandomSeed64 = QtPrivate::QHashCombine{}(RandomSeed32, RandomSeed32);
class tst_QHash : public QObject
{
@@ -31,6 +33,8 @@ private slots:
void hashing_current_data() { data(); }
void hashing_current() { hashing_template<QString>(); }
+ void hashing_qbytearray_data() { data(); }
+ void hashing_qbytearray() { hashing_template<QByteArray>(); }
void hashing_qt50_data() { data(); }
void hashing_qt50() { hashing_template<Qt50String>(); }
void hashing_qt4_data() { data(); }
@@ -38,15 +42,25 @@ private slots:
void hashing_javaString_data() { data(); }
void hashing_javaString() { hashing_template<JavaString>(); }
+ void hashing_nonzero_current_data() { data(); }
+ void hashing_nonzero_current() { hashing_nonzero_template<QString>(); }
+ void hashing_nonzero_qbytearray_data() { data(); }
+ void hashing_nonzero_qbytearray() { hashing_nonzero_template<QByteArray>(); }
+ void hashing_nonzero_qlatin1string_data() { data(); }
+ void hashing_nonzero_qlatin1string() { hashing_nonzero_template<OwningLatin1String>(); }
+
private:
void data();
template <typename String> void qhash_template();
- template <typename String> void hashing_template();
+ template <typename String, size_t Seed = 0> void hashing_template();
+ template <typename String> void hashing_nonzero_template()
+ { hashing_template<String, size_t(RandomSeed64)>(); }
QStringList smallFilePaths;
QStringList uuids;
QStringList dict;
QStringList numbers;
+ QStringList longstrings;
};
///////////////////// QHash /////////////////////
@@ -68,10 +82,12 @@ void tst_QHash::initTestCase()
// guaranteed to be completely random, generated by http://xkcd.com/221/
QUuid ns = QUuid("{f43d2ef3-2fe9-4563-a6f5-5a0100c2d699}");
uuids.reserve(smallFilePaths.size());
+ longstrings.reserve(smallFilePaths.size());
foreach (const QString &path, smallFilePaths)
uuids.append(QUuid::createUuidV5(ns, path).toString());
-
+ for (qsizetype i = 0; i < uuids.size(); ++i)
+ longstrings.append(uuids.at(i).repeated(8));
// lots of strings with alphabetical characters, vaguely reminiscent of
// a dictionary.
@@ -112,6 +128,7 @@ void tst_QHash::data()
QTest::addColumn<QStringList>("items");
QTest::newRow("paths-small") << smallFilePaths;
QTest::newRow("uuids-list") << uuids;
+ QTest::newRow("longstrings-list") << longstrings;
QTest::newRow("dictionary") << dict;
QTest::newRow("numbers") << numbers;
}
@@ -132,19 +149,30 @@ template <typename String> void tst_QHash::qhash_template()
}
}
-template <typename String> void tst_QHash::hashing_template()
+template <typename String, size_t Seed> void tst_QHash::hashing_template()
{
// just the hashing function
QFETCH(QStringList, items);
QList<String> realitems;
realitems.reserve(items.size());
- foreach (const QString &s, items)
- realitems.append(s);
+ foreach (const QString &s, items) {
+ if constexpr (std::is_same_v<QString::value_type, typename String::value_type>) {
+ realitems.append(s);
+ } else if constexpr (sizeof(typename String::value_type) == 1) {
+ realitems.append(String(s.toLatin1()));
+ }
+ }
QBENCHMARK {
- for (int i = 0, n = realitems.size(); i != n; ++i)
- (void)qHash(realitems.at(i));
+ for (int i = 0, n = realitems.size(); i != n; ++i) {
+ volatile size_t h = qHash(realitems.at(i), Seed);
+ (void)h;
+#ifdef Q_CC_GNU
+ // "use" h
+ asm ("" : "+r" (h));
+#endif
+ }
}
}
diff --git a/tests/benchmarks/corelib/tools/qhash/tst_bench_qhash.h b/tests/benchmarks/corelib/tools/qhash/tst_bench_qhash.h
index 7d89f044c4..501b4a8b7f 100644
--- a/tests/benchmarks/corelib/tools/qhash/tst_bench_qhash.h
+++ b/tests/benchmarks/corelib/tools/qhash/tst_bench_qhash.h
@@ -1,8 +1,20 @@
// Copyright (C) 2016 The Qt Company Ltd.
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only
+#include <QHashFunctions>
#include <QString>
+struct OwningLatin1String : QByteArray
+{
+ OwningLatin1String() = default;
+ OwningLatin1String(const QByteArray &a) : QByteArray(a) {}
+ OwningLatin1String(QByteArray &&a) : QByteArray(std::move(a)) {}
+};
+QT_BEGIN_NAMESPACE
+inline size_t qHash(const OwningLatin1String &s, size_t seed = 0)
+{ return qHash(QLatin1StringView(s), seed); }
+QT_END_NAMESPACE
+
struct Qt4String : QString
{
Qt4String() {}
@@ -10,7 +22,7 @@ struct Qt4String : QString
};
QT_BEGIN_NAMESPACE
-size_t qHash(const Qt4String &);
+size_t qHash(const Qt4String &, size_t = 0);
QT_END_NAMESPACE
struct Qt50String : QString
@@ -31,6 +43,6 @@ struct JavaString : QString
};
QT_BEGIN_NAMESPACE
-size_t qHash(const JavaString &);
+size_t qHash(const JavaString &, size_t = 0);
QT_END_NAMESPACE