summaryrefslogtreecommitdiffstats
path: root/src/corelib
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2013-12-19 23:32:04 -0800
committerThe Qt Project <gerrit-noreply@qt-project.org>2014-01-23 01:06:34 +0100
commitea8e48a6799cf742ea23f4a30dcfc38a4988fe56 (patch)
tree3c8c09dd3ded200ac2d8f3f76752e0dc5c509dba /src/corelib
parent506c34fdd2b0a7adfd79b0a42b91f0bcc7b045ec (diff)
Update the qHash function for strings to use the CRC32 instruction
According to my profiling of Qt Creator, qHash and the SHA-1 calculation are the hottest spots remaining in QtCore. The current qHash function is not really vectorizable. We could come up with a different algorithm that is more SIMD-friendly, but since we have the CRC32 instruction that can read 32- and 64-bit entities, we're set. This commit also updates the benchmark for QHash and benchmarks both the hashing function itself and the QHash class. The updated benchmarks for the CRC32 on my machine shows that the hashing function is *always* improved, but the hashing isn't always. In particular, the current algorithm is better for the "numbers" case, for which the data sample differs in very few bits. The new code is 33% slower for that particular case. On average, the improvement (including the "numbers" case) is: compared to qHash only QHash Qt 5.0 function 2.54x 1.06x Qt 4.x function 4.34x 1.34x Java function 2.71x 1.11x Test machine: Sandybridge Core i7-2620M @ 2.66 GHz with turbo disabled for the benchmarks Change-Id: Ia80b98c0e20d785816f7a7f6ddf40b4b302c7297 Reviewed-by: Oswald Buddenhagen <oswald.buddenhagen@digia.com> Reviewed-by: Lars Knoll <lars.knoll@digia.com> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib')
-rw-r--r--src/corelib/tools/qhash.cpp63
1 files changed, 63 insertions, 0 deletions
diff --git a/src/corelib/tools/qhash.cpp b/src/corelib/tools/qhash.cpp
index f14fac00b8..18d1cee0eb 100644
--- a/src/corelib/tools/qhash.cpp
+++ b/src/corelib/tools/qhash.cpp
@@ -60,6 +60,7 @@
#include <qbytearray.h>
#include <qdatetime.h>
#include <qbasicatomic.h>
+#include <private/qsimd_p.h>
#ifndef QT_BOOTSTRAPPED
#include <qcoreapplication.h>
@@ -93,10 +94,69 @@ QT_BEGIN_NAMESPACE
(for instance, gcc 4.4 does that even at -O0).
*/
+#ifdef __SSE4_2__
+static inline bool hasFastCrc32()
+{
+ return true;
+}
+
+template <typename Char>
+static uint crc32(const Char *ptr, size_t len, uint h)
+{
+ // The CRC32 instructions from Nehalem calculate a 32-bit CRC32 checksum
+ const uchar *p = reinterpret_cast<const uchar *>(ptr);
+ const uchar *const e = p + (len * sizeof(Char));
+# ifdef Q_PROCESSOR_X86_64
+ // The 64-bit instruction still calculates only 32-bit, but without this
+ // variable GCC 4.9 still tries to clear the high bits on every loop
+ qulonglong h2 = h;
+
+ p += 8;
+ for ( ; p <= e; p += 8)
+ h2 = _mm_crc32_u64(h2, *reinterpret_cast<const qlonglong *>(p - 8));
+ h = h2;
+ p -= 8;
+
+ len = e - p;
+ if (len & 4) {
+ h = _mm_crc32_u32(h, *reinterpret_cast<const uint *>(p));
+ p += 4;
+ }
+# else
+ p += 4;
+ for ( ; p <= e; p += 4)
+ h = _mm_crc32_u32(h, *reinterpret_cast<const uint *>(p));
+ p -= 4;
+ len = e - p;
+# endif
+ if (len & 2) {
+ h = _mm_crc32_u16(h, *reinterpret_cast<const ushort *>(p));
+ p += 2;
+ }
+ if (sizeof(Char) == 1 && len & 1)
+ h = _mm_crc32_u8(h, *p);
+ return h;
+}
+#else
+static inline bool hasFastCrc32()
+{
+ return false;
+}
+
+static uint crc32(...)
+{
+ Q_UNREACHABLE();
+ return 0;
+}
+#endif
+
static inline uint hash(const uchar *p, int len, uint seed) Q_DECL_NOTHROW
{
uint h = seed;
+ if (hasFastCrc32())
+ return crc32(p, size_t(len), h);
+
for (int i = 0; i < len; ++i)
h = 31 * h + p[i];
@@ -107,6 +167,9 @@ static inline uint hash(const QChar *p, int len, uint seed) Q_DECL_NOTHROW
{
uint h = seed;
+ if (hasFastCrc32())
+ return crc32(p, size_t(len), h);
+
for (int i = 0; i < len; ++i)
h = 31 * h + p[i].unicode();