summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qhash.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/tools/qhash.cpp')
-rw-r--r--src/corelib/tools/qhash.cpp241
1 files changed, 102 insertions, 139 deletions
diff --git a/src/corelib/tools/qhash.cpp b/src/corelib/tools/qhash.cpp
index 4b3996d432..737033261e 100644
--- a/src/corelib/tools/qhash.cpp
+++ b/src/corelib/tools/qhash.cpp
@@ -71,184 +71,141 @@
QT_BEGIN_NAMESPACE
/*
- The Java's hashing algorithm for strings is a variation of D. J. Bernstein
- hashing algorithm appeared here http://cr.yp.to/cdb/cdb.txt
- and informally known as DJB33XX - DJB's 33 Times Xor.
- Java uses DJB31XA, that is, 31 Times Add.
-
- The original algorithm was a loop around
- (h << 5) + h ^ c
- (which is indeed h*33 ^ c); it was then changed to
- (h << 5) - h ^ c
- (so h*31^c: DJB31XX), and the XOR changed to a sum:
- (h << 5) - h + c
- (DJB31XA), which can save some assembly instructions.
-
- Still, we can avoid writing the multiplication as "(h << 5) - h"
- -- the compiler will turn it into a shift and an addition anyway
- (for instance, gcc 4.4 does that even at -O0).
-*/
-
-#if QT_COMPILER_SUPPORTS_HERE(SSE4_2)
-static inline bool hasFastCrc32()
+ * Hashing for memory segments is based on the public domain MurmurHash2 by
+ * Austin Appleby. See http://murmurhash.googlepages.com/
+ */
+static inline uint hash(const void *key, uint len, uint seed) noexcept
{
- return qCpuHasFeature(SSE4_2);
-}
+ // 'm' and 'r' are mixing constants generated offline.
+ // They're not really 'magic', they just happen to work well.
-template <typename Char>
-QT_FUNCTION_TARGET(SSE4_2)
-static uint crc32(const Char *ptr, size_t len, uint h)
-{
- // The CRC32 instructions from Nehalem calculate a 32-bit CRC32 checksum
- const uchar *p = reinterpret_cast<const uchar *>(ptr);
- const uchar *const e = p + (len * sizeof(Char));
-# ifdef Q_PROCESSOR_X86_64
- // The 64-bit instruction still calculates only 32-bit, but without this
- // variable GCC 4.9 still tries to clear the high bits on every loop
- qulonglong h2 = h;
-
- p += 8;
- for ( ; p <= e; p += 8)
- h2 = _mm_crc32_u64(h2, qFromUnaligned<qlonglong>(p - 8));
- h = h2;
- p -= 8;
-
- len = e - p;
- if (len & 4) {
- h = _mm_crc32_u32(h, qFromUnaligned<uint>(p));
- p += 4;
- }
-# else
- p += 4;
- for ( ; p <= e; p += 4)
- h = _mm_crc32_u32(h, qFromUnaligned<uint>(p - 4));
- p -= 4;
- len = e - p;
-# endif
- if (len & 2) {
- h = _mm_crc32_u16(h, qFromUnaligned<ushort>(p));
- p += 2;
+ const unsigned int m = 0x5bd1e995;
+ const int r = 24;
+
+ // Initialize the hash to a 'random' value
+
+ unsigned int h = seed ^ len;
+
+ // Mix 4 bytes at a time into the hash
+
+ const unsigned char *data = reinterpret_cast<const unsigned char *>(key);
+ const unsigned char *end = data + (len & ~3);
+
+ while (data != end) {
+ size_t k;
+ memcpy(&k, data, sizeof(uint));
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h *= m;
+ h ^= k;
+
+ data += 4;
}
- if (sizeof(Char) == 1 && len & 1)
- h = _mm_crc32_u8(h, *p);
- return h;
-}
-#elif defined(__ARM_FEATURE_CRC32)
-static inline bool hasFastCrc32()
-{
- return qCpuHasFeature(CRC32);
-}
-template <typename Char>
-#if defined(Q_PROCESSOR_ARM_64)
-QT_FUNCTION_TARGET(CRC32)
-#endif
-static uint crc32(const Char *ptr, size_t len, uint h)
-{
- // The crc32[whbd] instructions on Aarch64/Aarch32 calculate a 32-bit CRC32 checksum
- const uchar *p = reinterpret_cast<const uchar *>(ptr);
- const uchar *const e = p + (len * sizeof(Char));
-
-#ifndef __ARM_FEATURE_UNALIGNED
- if (Q_UNLIKELY(reinterpret_cast<quintptr>(p) & 7)) {
- if ((sizeof(Char) == 1) && (reinterpret_cast<quintptr>(p) & 1) && (e - p > 0)) {
- h = __crc32b(h, *p);
- ++p;
- }
- if ((reinterpret_cast<quintptr>(p) & 2) && (e >= p + 2)) {
- h = __crc32h(h, *reinterpret_cast<const uint16_t *>(p));
- p += 2;
- }
- if ((reinterpret_cast<quintptr>(p) & 4) && (e >= p + 4)) {
- h = __crc32w(h, *reinterpret_cast<const uint32_t *>(p));
- p += 4;
+ // Handle the last few bytes of the input array
+ len &= 3;
+ if (len) {
+ unsigned int k = 0;
+ end += len;
+
+ while (data != end) {
+ k <<= 8;
+ k |= *data;
+ ++data;
}
+ h ^= k;
+ h *= m;
}
-#endif
- for ( ; p + 8 <= e; p += 8)
- h = __crc32d(h, *reinterpret_cast<const uint64_t *>(p));
+ // Do a few final mixes of the hash to ensure the last few
+ // bytes are well-incorporated.
+
+ h ^= h >> 13;
+ h *= m;
+ h ^= h >> 15;
- len = e - p;
- if (len == 0)
- return h;
- if (len & 4) {
- h = __crc32w(h, *reinterpret_cast<const uint32_t *>(p));
- p += 4;
- }
- if (len & 2) {
- h = __crc32h(h, *reinterpret_cast<const uint16_t *>(p));
- p += 2;
- }
- if (sizeof(Char) == 1 && len & 1)
- h = __crc32b(h, *p);
return h;
}
-#else
-static inline bool hasFastCrc32()
-{
- return false;
-}
-static uint crc32(...)
+static inline uint64_t hash(const void *key, uint64_t len, uint64_t seed) noexcept
{
- Q_UNREACHABLE();
- return 0;
-}
-#endif
+ const uint64_t m = 0xc6a4a7935bd1e995ULL;
+ const int r = 47;
-static inline uint hash(const uchar *p, size_t len, uint seed) noexcept
-{
- uint h = seed;
+ uint64_t h = seed ^ (len * m);
- if (seed && hasFastCrc32())
- return crc32(p, len, h);
+ const unsigned char *data = reinterpret_cast<const unsigned char *>(key);
+ const unsigned char *end = data + (len & ~7ul);
- for (size_t i = 0; i < len; ++i)
- h = 31 * h + p[i];
+ while (data != end) {
+ uint64_t k;
+ memcpy(&k, data, sizeof(uint64_t));
- return h;
-}
+ k *= m;
+ k ^= k >> r;
+ k *= m;
-static inline uint hash(const QChar *p, size_t len, uint seed) noexcept
-{
- uint h = seed;
+ h ^= k;
+ h *= m;
+
+ data += 8;
+ }
- if (seed && hasFastCrc32())
- return crc32(p, len, h);
+ len &= 7;
+ if (len) {
+ // handle the last few bytes of input
+ size_t k = 0;
+ end += len;
- for (size_t i = 0; i < len; ++i)
- h = 31 * h + p[i].unicode();
+ while (data != end) {
+ k <<= 8;
+ k |= *data;
+ ++data;
+ }
+ h ^= k;
+ h *= m;
+ }
+
+ h ^= h >> r;
+ h *= m;
+ h ^= h >> r;
return h;
}
size_t qHashBits(const void *p, size_t size, size_t seed) noexcept
{
- return hash(static_cast<const uchar*>(p), int(size), static_cast<uint>(seed));
+ size_t result;
+ if constexpr (sizeof(size_t) == 8)
+ result = hash(p, uint64_t(size), uint64_t(seed));
+ else
+ result = hash(p, uint(size), uint(seed));
+ return result;
}
-
size_t qHash(const QByteArray &key, size_t seed) noexcept
{
- return hash(reinterpret_cast<const uchar *>(key.constData()), size_t(key.size()), seed);
+ return qHashBits(key.constData(), size_t(key.size()), seed);
}
#if QT_STRINGVIEW_LEVEL < 2
size_t qHash(const QString &key, size_t seed) noexcept
{
- return hash(key.unicode(), size_t(key.size()), seed);
+ return qHashBits(key.unicode(), size_t(key.size())*sizeof(QChar), seed);
}
size_t qHash(const QStringRef &key, size_t seed) noexcept
{
- return hash(key.unicode(), size_t(key.size()), seed);
+ return qHashBits(key.unicode(), size_t(key.size())*sizeof(QChar), seed);
}
#endif
size_t qHash(QStringView key, size_t seed) noexcept
{
- return hash(key.data(), key.size(), seed);
+ return qHashBits(key.data(), key.size()*sizeof(QChar), seed);
}
size_t qHash(const QBitArray &bitArray, size_t seed) noexcept
@@ -266,7 +223,7 @@ size_t qHash(const QBitArray &bitArray, size_t seed) noexcept
size_t qHash(QLatin1String key, size_t seed) noexcept
{
- return hash(reinterpret_cast<const uchar *>(key.data()), size_t(key.size()), seed);
+ return qHashBits(reinterpret_cast<const uchar *>(key.data()), size_t(key.size()), seed);
}
/*!
@@ -610,7 +567,9 @@ uint qt_hash(QStringView key, uint chained) noexcept
*/
size_t qHash(float key, size_t seed) noexcept
{
- return key != 0.0f ? hash(reinterpret_cast<const uchar *>(&key), sizeof(key), seed) : seed ;
+ // ensure -0 gets mapped to 0
+ key += 0.0f;
+ return qHashBits(&key, sizeof(key), seed);
}
/*! \relates QHash
@@ -620,7 +579,9 @@ size_t qHash(float key, size_t seed) noexcept
*/
size_t qHash(double key, size_t seed) noexcept
{
- return key != 0.0 ? hash(reinterpret_cast<const uchar *>(&key), sizeof(key), seed) : seed ;
+ // ensure -0 gets mapped to 0
+ key += 0.0;
+ return qHashBits(&key, sizeof(key), seed);
}
#if !defined(Q_OS_DARWIN) || defined(Q_CLANG_QDOC)
@@ -631,7 +592,9 @@ size_t qHash(double key, size_t seed) noexcept
*/
size_t qHash(long double key, size_t seed) noexcept
{
- return key != 0.0L ? hash(reinterpret_cast<const uchar *>(&key), sizeof(key), seed) : seed ;
+ // ensure -0 gets mapped to 0
+ key += static_cast<long double>(0.0);
+ return qHashBits(&key, sizeof(key), seed);
}
#endif