diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2013-12-19 23:32:04 -0800 |
---|---|---|
committer | The Qt Project <gerrit-noreply@qt-project.org> | 2014-01-23 01:06:34 +0100 |
commit | ea8e48a6799cf742ea23f4a30dcfc38a4988fe56 (patch) | |
tree | 3c8c09dd3ded200ac2d8f3f76752e0dc5c509dba /src/corelib | |
parent | 506c34fdd2b0a7adfd79b0a42b91f0bcc7b045ec (diff) |
Update the qHash function for strings to use the CRC32 instruction
According to my profiling of Qt Creator, qHash and the SHA-1 calculation
are the hottest spots remaining in QtCore. The current qHash function is
not really vectorizable. We could come up with a different algorithm
that is more SIMD-friendly, but since we have the CRC32 instruction that
can read 32- and 64-bit entities, we're set.
This commit also updates the benchmark for QHash and benchmarks both
the hashing function itself and the QHash class. The updated
benchmarks for the CRC32 on my machine shows that the hashing function
is *always* improved, but the hashing isn't always. In particular, the
current algorithm is better for the "numbers" case, for which the data
sample differs in very few bits. The new code is 33% slower for that
particular case.
On average, the improvement (including the "numbers" case) is:
compared to qHash only QHash
Qt 5.0 function 2.54x 1.06x
Qt 4.x function 4.34x 1.34x
Java function 2.71x 1.11x
Test machine: Sandybridge Core i7-2620M @ 2.66 GHz with turbo disabled
for the benchmarks
Change-Id: Ia80b98c0e20d785816f7a7f6ddf40b4b302c7297
Reviewed-by: Oswald Buddenhagen <oswald.buddenhagen@digia.com>
Reviewed-by: Lars Knoll <lars.knoll@digia.com>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib')
-rw-r--r-- | src/corelib/tools/qhash.cpp | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/src/corelib/tools/qhash.cpp b/src/corelib/tools/qhash.cpp index f14fac00b8..18d1cee0eb 100644 --- a/src/corelib/tools/qhash.cpp +++ b/src/corelib/tools/qhash.cpp @@ -60,6 +60,7 @@ #include <qbytearray.h> #include <qdatetime.h> #include <qbasicatomic.h> +#include <private/qsimd_p.h> #ifndef QT_BOOTSTRAPPED #include <qcoreapplication.h> @@ -93,10 +94,69 @@ QT_BEGIN_NAMESPACE (for instance, gcc 4.4 does that even at -O0). */ +#ifdef __SSE4_2__ +static inline bool hasFastCrc32() +{ + return true; +} + +template <typename Char> +static uint crc32(const Char *ptr, size_t len, uint h) +{ + // The CRC32 instructions from Nehalem calculate a 32-bit CRC32 checksum + const uchar *p = reinterpret_cast<const uchar *>(ptr); + const uchar *const e = p + (len * sizeof(Char)); +# ifdef Q_PROCESSOR_X86_64 + // The 64-bit instruction still calculates only 32-bit, but without this + // variable GCC 4.9 still tries to clear the high bits on every loop + qulonglong h2 = h; + + p += 8; + for ( ; p <= e; p += 8) + h2 = _mm_crc32_u64(h2, *reinterpret_cast<const qlonglong *>(p - 8)); + h = h2; + p -= 8; + + len = e - p; + if (len & 4) { + h = _mm_crc32_u32(h, *reinterpret_cast<const uint *>(p)); + p += 4; + } +# else + p += 4; + for ( ; p <= e; p += 4) + h = _mm_crc32_u32(h, *reinterpret_cast<const uint *>(p)); + p -= 4; + len = e - p; +# endif + if (len & 2) { + h = _mm_crc32_u16(h, *reinterpret_cast<const ushort *>(p)); + p += 2; + } + if (sizeof(Char) == 1 && len & 1) + h = _mm_crc32_u8(h, *p); + return h; +} +#else +static inline bool hasFastCrc32() +{ + return false; +} + +static uint crc32(...) +{ + Q_UNREACHABLE(); + return 0; +} +#endif + static inline uint hash(const uchar *p, int len, uint seed) Q_DECL_NOTHROW { uint h = seed; + if (hasFastCrc32()) + return crc32(p, size_t(len), h); + for (int i = 0; i < len; ++i) h = 31 * h + p[i]; @@ -107,6 +167,9 @@ static inline uint hash(const QChar *p, int len, uint seed) Q_DECL_NOTHROW { uint h = seed; + if (hasFastCrc32()) + return crc32(p, size_t(len), h); + for (int i = 0; i < len; ++i) h = 31 * h + p[i].unicode(); |