summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2016-10-27 22:57:42 -0700
committerLars Knoll <lars.knoll@qt.io>2020-04-09 20:03:52 +0200
commit09bdf907cfc41fc20c809c26a5c79cf3b4bf3b14 (patch)
tree25836ce624f0c92aff66a6b9da03345d54720137
parentf14559790b95506b1e3231ee6fc1d95b730d572c (diff)
Replace Qt's hashing function with SipHash
This commit replaces MurmurHash with SipHash for all strings longer than the size of a pointer. The most important difference between those algorithms is that MurmurHash has this unwelcome property: for two byte sequences x and y, if you know that x and y have the same hashing for a given seed, then they have the same hashing for all seeds. SipHash has no such issue. If the seed changes, the strings that used to compute to the same hash are no longer likely to do so. We've chosen to implement a SipHash-1-2 algorithm instead of the regular 2-4 as that has roughly the same performance as the old DJB33XA algorithm. It's around 50% slower than MurmurHash, which is acceptable given the added security. Task-number: QTBUG-47566 Change-Id: I09100678ff4443e6be06fffd14819c8878d223e2 Reviewed-by: MÃ¥rten Nordheim <marten.nordheim@qt.io>
-rw-r--r--src/corelib/tools/LICENSE.siphash116
-rw-r--r--src/corelib/tools/qhash.cpp231
-rw-r--r--src/corelib/tools/qt_attribution.json15
3 files changed, 351 insertions, 11 deletions
diff --git a/src/corelib/tools/LICENSE.siphash b/src/corelib/tools/LICENSE.siphash
new file mode 100644
index 0000000000..670154e353
--- /dev/null
+++ b/src/corelib/tools/LICENSE.siphash
@@ -0,0 +1,116 @@
+CC0 1.0 Universal
+
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer
+exclusive Copyright and Related Rights (defined below) upon the creator and
+subsequent owner(s) (each and all, an "owner") of an original work of
+authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for the
+purpose of contributing to a commons of creative, cultural and scientific
+works ("Commons") that the public can reliably and without fear of later
+claims of infringement build upon, modify, incorporate in other works, reuse
+and redistribute as freely as possible in any form whatsoever and for any
+purposes, including without limitation commercial purposes. These owners may
+contribute to the Commons to promote the ideal of a free culture and the
+further production of creative, cultural and scientific works, or to gain
+reputation or greater distribution for their Work in part through the use and
+efforts of others.
+
+For these and/or other purposes and motivations, and without any expectation
+of additional consideration or compensation, the person associating CC0 with a
+Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
+and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
+and publicly distribute the Work under its terms, with knowledge of his or her
+Copyright and Related Rights in the Work and the meaning and intended legal
+effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be
+protected by copyright and related or neighboring rights ("Copyright and
+Related Rights"). Copyright and Related Rights include, but are not limited
+to, the following:
+
+ i. the right to reproduce, adapt, distribute, perform, display, communicate,
+ and translate a Work;
+
+ ii. moral rights retained by the original author(s) and/or performer(s);
+
+ iii. publicity and privacy rights pertaining to a person's image or likeness
+ depicted in a Work;
+
+ iv. rights protecting against unfair competition in regards to a Work,
+ subject to the limitations in paragraph 4(a), below;
+
+ v. rights protecting the extraction, dissemination, use and reuse of data in
+ a Work;
+
+ vi. database rights (such as those arising under Directive 96/9/EC of the
+ European Parliament and of the Council of 11 March 1996 on the legal
+ protection of databases, and under any national implementation thereof,
+ including any amended or successor version of such directive); and
+
+ vii. other similar, equivalent or corresponding rights throughout the world
+ based on applicable law or treaty, and any national implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention of,
+applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
+unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
+and Related Rights and associated claims and causes of action, whether now
+known or unknown (including existing as well as future claims and causes of
+action), in the Work (i) in all territories worldwide, (ii) for the maximum
+duration provided by applicable law or treaty (including future time
+extensions), (iii) in any current or future medium and for any number of
+copies, and (iv) for any purpose whatsoever, including without limitation
+commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
+the Waiver for the benefit of each member of the public at large and to the
+detriment of Affirmer's heirs and successors, fully intending that such Waiver
+shall not be subject to revocation, rescission, cancellation, termination, or
+any other legal or equitable action to disrupt the quiet enjoyment of the Work
+by the public as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason be
+judged legally invalid or ineffective under applicable law, then the Waiver
+shall be preserved to the maximum extent permitted taking into account
+Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
+is so judged Affirmer hereby grants to each affected person a royalty-free,
+non transferable, non sublicensable, non exclusive, irrevocable and
+unconditional license to exercise Affirmer's Copyright and Related Rights in
+the Work (i) in all territories worldwide, (ii) for the maximum duration
+provided by applicable law or treaty (including future time extensions), (iii)
+in any current or future medium and for any number of copies, and (iv) for any
+purpose whatsoever, including without limitation commercial, advertising or
+promotional purposes (the "License"). The License shall be deemed effective as
+of the date CC0 was applied by Affirmer to the Work. Should any part of the
+License for any reason be judged legally invalid or ineffective under
+applicable law, such partial invalidity or ineffectiveness shall not
+invalidate the remainder of the License, and in such case Affirmer hereby
+affirms that he or she will not (i) exercise any of his or her remaining
+Copyright and Related Rights in the Work or (ii) assert any associated claims
+and causes of action with respect to the Work, in either case contrary to
+Affirmer's express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+ a. No trademark or patent rights held by Affirmer are waived, abandoned,
+ surrendered, licensed or otherwise affected by this document.
+
+ b. Affirmer offers the Work as-is and makes no representations or warranties
+ of any kind concerning the Work, express, implied, statutory or otherwise,
+ including without limitation warranties of title, merchantability, fitness
+ for a particular purpose, non infringement, or the absence of latent or
+ other defects, accuracy, or the present or absence of errors, whether or not
+ discoverable, all to the greatest extent permissible under applicable law.
+
+ c. Affirmer disclaims responsibility for clearing rights of other persons
+ that may apply to the Work or any use thereof, including without limitation
+ any person's Copyright and Related Rights in the Work. Further, Affirmer
+ disclaims responsibility for obtaining any necessary consents, permissions
+ or other rights required for any use of the Work.
+
+ d. Affirmer understands and acknowledges that Creative Commons is not a
+ party to this document and has no duty or obligation with respect to this
+ CC0 or use of the Work.
+
+For more information, please see
+<http://creativecommons.org/publicdomain/zero/1.0/>
diff --git a/src/corelib/tools/qhash.cpp b/src/corelib/tools/qhash.cpp
index 737033261e..4c62f6b160 100644
--- a/src/corelib/tools/qhash.cpp
+++ b/src/corelib/tools/qhash.cpp
@@ -45,6 +45,7 @@
#define _CRT_RAND_S
#endif
#include <stdlib.h>
+#include <stdint.h>
#include "qhash.h"
@@ -70,11 +71,17 @@
QT_BEGIN_NAMESPACE
+// We assume that pointers and size_t have the same size. If that assumption should fail
+// on a platform the code selecting the different methods below needs to be fixed.
+static_assert(sizeof(size_t) == QT_POINTER_SIZE, "size_t and pointers have different size.");
+
/*
* Hashing for memory segments is based on the public domain MurmurHash2 by
* Austin Appleby. See http://murmurhash.googlepages.com/
*/
-static inline uint hash(const void *key, uint len, uint seed) noexcept
+#if QT_POINTER_SIZE == 4
+
+static inline uint murmurhash(const void *key, uint len, uint seed) noexcept
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
@@ -130,7 +137,9 @@ static inline uint hash(const void *key, uint len, uint seed) noexcept
return h;
}
-static inline uint64_t hash(const void *key, uint64_t len, uint64_t seed) noexcept
+#else
+
+static inline uint64_t murmurhash(const void *key, uint64_t len, uint64_t seed) noexcept
{
const uint64_t m = 0xc6a4a7935bd1e995ULL;
const int r = 47;
@@ -176,14 +185,214 @@ static inline uint64_t hash(const void *key, uint64_t len, uint64_t seed) noexce
return h;
}
+#endif
+
+#if QT_POINTER_SIZE == 8
+// This is an inlined version of the SipHash implementation that is
+// trying to avoid some memcpy's from uint64 to uint8[] and back.
+//
+// The original algorithm uses a 128bit seed. Our public API only allows
+// for a 64bit seed, so we mix in the length of the string to get some more
+// bits for the seed.
+//
+// Use SipHash-1-2, which has similar performance characteristics as
+// stablehash() above, instead of the SipHash-2-4 default
+#define cROUNDS 1
+#define dROUNDS 2
+
+#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
+
+#define SIPROUND \
+ do { \
+ v0 += v1; \
+ v1 = ROTL(v1, 13); \
+ v1 ^= v0; \
+ v0 = ROTL(v0, 32); \
+ v2 += v3; \
+ v3 = ROTL(v3, 16); \
+ v3 ^= v2; \
+ v0 += v3; \
+ v3 = ROTL(v3, 21); \
+ v3 ^= v0; \
+ v2 += v1; \
+ v1 = ROTL(v1, 17); \
+ v1 ^= v2; \
+ v2 = ROTL(v2, 32); \
+ } while (0)
+
+
+static uint64_t siphash(const uint8_t *in, uint64_t inlen, const uint64_t seed)
+{
+ /* "somepseudorandomlygeneratedbytes" */
+ uint64_t v0 = 0x736f6d6570736575ULL;
+ uint64_t v1 = 0x646f72616e646f6dULL;
+ uint64_t v2 = 0x6c7967656e657261ULL;
+ uint64_t v3 = 0x7465646279746573ULL;
+ uint64_t b;
+ uint64_t k0 = seed;
+ uint64_t k1 = seed ^ inlen;
+ int i;
+ const uint8_t *end = in + (inlen & ~7ULL);
+ const int left = inlen & 7;
+ b = inlen << 56;
+ v3 ^= k1;
+ v2 ^= k0;
+ v1 ^= k1;
+ v0 ^= k0;
+
+ for (; in != end; in += 8) {
+ uint64_t m = qFromUnaligned<uint64_t>(in);
+ v3 ^= m;
+
+ for (i = 0; i < cROUNDS; ++i)
+ SIPROUND;
+
+ v0 ^= m;
+ }
+
+
+#if defined(Q_CC_GNU) && Q_CC_GNU >= 700
+ QT_WARNING_DISABLE_GCC("-Wimplicit-fallthrough")
+#endif
+ switch (left) {
+ case 7:
+ b |= ((uint64_t)in[6]) << 48;
+ case 6:
+ b |= ((uint64_t)in[5]) << 40;
+ case 5:
+ b |= ((uint64_t)in[4]) << 32;
+ case 4:
+ b |= ((uint64_t)in[3]) << 24;
+ case 3:
+ b |= ((uint64_t)in[2]) << 16;
+ case 2:
+ b |= ((uint64_t)in[1]) << 8;
+ case 1:
+ b |= ((uint64_t)in[0]);
+ break;
+ case 0:
+ break;
+ }
+
+ v3 ^= b;
+
+ for (i = 0; i < cROUNDS; ++i)
+ SIPROUND;
+
+ v0 ^= b;
+
+ v2 ^= 0xff;
+
+ for (i = 0; i < dROUNDS; ++i)
+ SIPROUND;
+
+ b = v0 ^ v1 ^ v2 ^ v3;
+ return b;
+}
+#else
+// This is a "SipHash" implementation adopted for 32bit platforms. It performs
+// basically the same operations as the 64bit version using 4 byte at a time
+// instead of 8.
+//
+// To make this work, we also need to change the constants for the mixing
+// rotations in ROTL. We're simply using half of the 64bit constants, rounded up
+// for odd numbers.
+//
+// For the v0-v4 constants, simply use the first four bytes of the 64 bit versions.
+//
+// Use SipHash-1-2, which has similar performance characteristics as
+// stablehash() above, instead of the SipHash-2-4 default
+#define cROUNDS 1
+#define dROUNDS 2
+
+#define ROTL(x, b) (uint32_t)(((x) << (b)) | ((x) >> (32 - (b))))
+
+#define SIPROUND \
+ do { \
+ v0 += v1; \
+ v1 = ROTL(v1, 7); \
+ v1 ^= v0; \
+ v0 = ROTL(v0, 16); \
+ v2 += v3; \
+ v3 = ROTL(v3, 8); \
+ v3 ^= v2; \
+ v0 += v3; \
+ v3 = ROTL(v3, 11); \
+ v3 ^= v0; \
+ v2 += v1; \
+ v1 = ROTL(v1, 9); \
+ v1 ^= v2; \
+ v2 = ROTL(v2, 16); \
+ } while (0)
+
+
+static uint siphash(const uint8_t *in, uint inlen, const uint seed)
+{
+ /* "somepseudorandomlygeneratedbytes" */
+ uint v0 = 0x736f6d65U;
+ uint v1 = 0x646f7261U;
+ uint v2 = 0x6c796765U;
+ uint v3 = 0x74656462U;
+ uint b;
+ uint k0 = seed;
+ uint k1 = seed ^ inlen;
+ int i;
+ const uint8_t *end = in + (inlen & ~3ULL);
+ const int left = inlen & 3;
+ b = inlen << 24;
+ v3 ^= k1;
+ v2 ^= k0;
+ v1 ^= k1;
+ v0 ^= k0;
+
+ for (; in != end; in += 4) {
+ uint m = qFromUnaligned<uint>(in);
+ v3 ^= m;
+
+ for (i = 0; i < cROUNDS; ++i)
+ SIPROUND;
+
+ v0 ^= m;
+ }
+
+#if defined(Q_CC_GNU) && Q_CC_GNU >= 700
+ QT_WARNING_DISABLE_GCC("-Wimplicit-fallthrough")
+#endif
+ switch (left) {
+ case 3:
+ b |= ((uint)in[2]) << 16;
+ case 2:
+ b |= ((uint)in[1]) << 8;
+ case 1:
+ b |= ((uint)in[0]);
+ break;
+ case 0:
+ break;
+ }
+
+ v3 ^= b;
+
+ for (i = 0; i < cROUNDS; ++i)
+ SIPROUND;
+
+ v0 ^= b;
+
+ v2 ^= 0xff;
+
+ for (i = 0; i < dROUNDS; ++i)
+ SIPROUND;
+
+ b = v0 ^ v1 ^ v2 ^ v3;
+ return b;
+}
+#endif
+
size_t qHashBits(const void *p, size_t size, size_t seed) noexcept
{
- size_t result;
- if constexpr (sizeof(size_t) == 8)
- result = hash(p, uint64_t(size), uint64_t(seed));
- else
- result = hash(p, uint(size), uint(seed));
- return result;
+ if (size <= QT_POINTER_SIZE)
+ return murmurhash(p, size, seed);
+
+ return siphash(reinterpret_cast<const uchar *>(p), size, seed);
}
size_t qHash(const QByteArray &key, size_t seed) noexcept
@@ -569,7 +778,7 @@ size_t qHash(float key, size_t seed) noexcept
{
// ensure -0 gets mapped to 0
key += 0.0f;
- return qHashBits(&key, sizeof(key), seed);
+ return murmurhash(&key, sizeof(key), seed);
}
/*! \relates QHash
@@ -581,7 +790,7 @@ size_t qHash(double key, size_t seed) noexcept
{
// ensure -0 gets mapped to 0
key += 0.0;
- return qHashBits(&key, sizeof(key), seed);
+ return murmurhash(&key, sizeof(key), seed);
}
#if !defined(Q_OS_DARWIN) || defined(Q_CLANG_QDOC)
@@ -594,7 +803,7 @@ size_t qHash(long double key, size_t seed) noexcept
{
// ensure -0 gets mapped to 0
key += static_cast<long double>(0.0);
- return qHashBits(&key, sizeof(key), seed);
+ return murmurhash(&key, sizeof(key), seed);
}
#endif
diff --git a/src/corelib/tools/qt_attribution.json b/src/corelib/tools/qt_attribution.json
new file mode 100644
index 0000000000..928ff537ca
--- /dev/null
+++ b/src/corelib/tools/qt_attribution.json
@@ -0,0 +1,15 @@
+{
+ "Id": "siphash",
+ "Name": "SipHash Algorithm",
+ "QDocModule": "qtcore",
+ "QtUsage": "Used in Qt Core (QHash)",
+
+ "Description": "Implements the SipHash algorithm.",
+ "Homepage": "https://131002.net/siphash/",
+ "DownloadLocation": "https://raw.githubusercontent.com/veorq/SipHash/adcbf09b1684a718f594faa650ffc56bacdb0777/siphash24.c",
+
+ "License": "Creative Commons Zero v1.0 Universal",
+ "LicenseId": "CC0-1.0",
+ "LicenseFile": "LICENSE.siphash",
+ "Copyright": "(C) 2012-2014 Jean-Philippe Aumasson, (C) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>"
+}