summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2024-02-02 20:00:33 -0800
committerThiago Macieira <thiago.macieira@intel.com>2024-03-12 17:23:09 -0800
commit970aad541811d002e5004bd3826929247492ba09 (patch)
tree0b2a531dd7ea4a5f80f3b8e6e5475f7983746307 /src/corelib/tools
parent9a2e21174a08cf282cb933c95380abd900102ae0 (diff)
qHash: implement chunked hashing of QLatin1StringView
So that it hashes to the same value as QString{,View}. In order to test this, you must either run on a CPU other than ARM and x86, or disable the AES hasher. I did that and can confirm siphash and murmurhash do work with on-the-fly conversion from Latin-1. Change-Id: I664b9f014ffc48cbb49bfffd17b03e5e62ec4e89 Reviewed-by: MÃ¥rten Nordheim <marten.nordheim@qt.io>
Diffstat (limited to 'src/corelib/tools')
-rw-r--r--src/corelib/tools/qhash.cpp85
1 files changed, 78 insertions, 7 deletions
diff --git a/src/corelib/tools/qhash.cpp b/src/corelib/tools/qhash.cpp
index 862ebca414..591a1ca1c6 100644
--- a/src/corelib/tools/qhash.cpp
+++ b/src/corelib/tools/qhash.cpp
@@ -49,6 +49,8 @@
QT_BEGIN_NAMESPACE
+void qt_from_latin1(char16_t *dst, const char *str, size_t size) noexcept; // qstring.cpp
+
// We assume that pointers and size_t have the same size. If that assumption should fail
// on a platform the code selecting the different methods below needs to be fixed.
static_assert(sizeof(size_t) == QT_POINTER_SIZE, "size_t and pointers have different size.");
@@ -523,6 +525,52 @@ static size_t siphash(const uint8_t *in, size_t inlen, size_t seed, size_t seed2
return hasher.finalize(in + (inlen & ~TailSizeMask), inlen & TailSizeMask);
}
+enum ZeroExtension {
+ None = 0,
+ ByteToWord = 1,
+};
+
+template <ZeroExtension = None> static size_t
+qHashBits_fallback(const uchar *p, size_t size, size_t seed, size_t seed2) noexcept;
+template <> size_t qHashBits_fallback<None>(const uchar *p, size_t size, size_t seed, size_t seed2) noexcept
+{
+ if (size <= QT_POINTER_SIZE)
+ return murmurhash(p, size, seed);
+
+ return siphash(reinterpret_cast<const uchar *>(p), size, seed, seed2);
+}
+
+template <> size_t qHashBits_fallback<ByteToWord>(const uchar *data, size_t size, size_t seed, size_t seed2) noexcept
+{
+ auto quick_from_latin1 = [](char16_t *dest, const uchar *data, size_t size) {
+ // Quick, "inlined" version for very short blocks
+ std::copy_n(data, size, dest);
+ };
+ if (size <= QT_POINTER_SIZE / 2) {
+ std::array<char16_t, QT_POINTER_SIZE / 2> buf;
+ quick_from_latin1(buf.data(), data, size);
+ return murmurhash(buf.data(), size * 2, seed);
+ }
+
+ constexpr size_t TailSizeMask = sizeof(void *) / 2 - 1;
+ std::array<char16_t, 256> buf;
+ SipHash<> siphash(size * 2, seed, seed2);
+ ptrdiff_t offset = 0;
+ for ( ; offset + buf.size() < size; offset += buf.size()) {
+ qt_from_latin1(buf.data(), reinterpret_cast<const char *>(data) + offset, buf.size());
+ siphash.addBlock(reinterpret_cast<uint8_t *>(buf.data()), sizeof(buf));
+ }
+ if (size_t n = size - offset; n > TailSizeMask) {
+ n &= ~TailSizeMask;
+ qt_from_latin1(buf.data(), reinterpret_cast<const char *>(data) + offset, n);
+ siphash.addBlock(reinterpret_cast<uint8_t *>(buf.data()), n * 2);
+ offset += n;
+ }
+
+ quick_from_latin1(buf.data(), data + offset, size - offset);
+ return siphash.finalize(reinterpret_cast<uint8_t *>(buf.data()), (size - offset) * 2);
+}
+
#if defined(__SANITIZE_ADDRESS__) || defined(__SANITIZE_THREAD__) // GCC
# define QHASH_AES_SANITIZER_BUILD
#elif __has_feature(address_sanitizer) || __has_feature(thread_sanitizer) // Clang
@@ -978,18 +1026,17 @@ size_t qHashBits(const void *p, size_t size, size_t seed) noexcept
size_t seed2 = size;
if (seed)
seed2 = qt_qhash_seed.currentSeed(1);
+
+ auto data = reinterpret_cast<const uchar *>(p);
#ifdef AESHASH
if (seed && qCpuHasFeature(AES) && qCpuHasFeature(SSE4_2))
- return aeshash(reinterpret_cast<const uchar *>(p), size, seed, seed2);
+ return aeshash(data, size, seed, seed2);
#elif defined(Q_PROCESSOR_ARM) && QT_COMPILER_SUPPORTS_HERE(AES) && !defined(QHASH_AES_SANITIZER_BUILD) && !defined(QT_BOOTSTRAPPED)
if (seed && qCpuHasFeature(AES))
- return aeshash(reinterpret_cast<const uchar *>(p), size, seed, seed2);
+ return aeshash(data, size, seed, seed2);
#endif
- if (size <= QT_POINTER_SIZE)
- return murmurhash(p, size, seed);
-
- return siphash(reinterpret_cast<const uchar *>(p), size, seed, seed2);
+ return qHashBits_fallback<>(data, size, seed, seed2);
}
size_t qHash(QByteArrayView key, size_t seed) noexcept
@@ -1019,7 +1066,31 @@ size_t qHash(const QBitArray &bitArray, size_t seed) noexcept
size_t qHash(QLatin1StringView key, size_t seed) noexcept
{
- return qHashBits(reinterpret_cast<const uchar *>(key.data()), size_t(key.size()), seed);
+#ifdef QT_BOOTSTRAPPED
+ // the seed is always 0 in bootstrapped mode (no seed generation code),
+ // so help the compiler do dead code elimination
+ seed = 0;
+ constexpr bool Qt6DeterministicHash = true;
+#else
+ constexpr bool Qt6DeterministicHash = QT_VERSION_MAJOR == 6;
+#endif
+
+ auto data = reinterpret_cast<const uchar *>(key.data());
+ size_t size = key.size();
+
+ if (seed == 0 && Qt6DeterministicHash) {
+ // fall back to what we used to use prior to Qt 6.8
+ return qHashBits(data, size, seed);
+ }
+
+ // mix in the length as a secondary seed. For seed == 0, seed2 must be
+ // size, to match what we used to do prior to Qt 6.2.
+ // Multiplied by 2 to match the byte size of the equiavlent UTF-16 string.
+ size_t seed2 = size * 2;
+ if (seed)
+ seed2 = qt_qhash_seed.currentSeed(1);
+
+ return qHashBits_fallback<ByteToWord>(data, size, seed, seed2);
}
/*!