From ed991b7d15c097bb5e9f20f6d9875b4f82df1d12 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Fri, 28 Oct 2016 22:23:36 -0700 Subject: Add an AES-based qHash function, inspired on Go's Change-Id: I09100678ff4443e6be06fffd1481e94089c47799 Reviewed-by: Lars Knoll --- src/corelib/tools/qhash.cpp | 137 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) (limited to 'src') diff --git a/src/corelib/tools/qhash.cpp b/src/corelib/tools/qhash.cpp index ff2ae6813c..91e5147780 100644 --- a/src/corelib/tools/qhash.cpp +++ b/src/corelib/tools/qhash.cpp @@ -387,8 +387,145 @@ static uint siphash(const uint8_t *in, uint inlen, const uint seed) } #endif + +#if QT_COMPILER_SUPPORTS_HERE(AES) && QT_COMPILER_SUPPORTS_HERE(SSE4_2) && \ + !(defined(__SANITIZE_ADDRESS__) || defined(__SANITIZE_THREAD__)) +# define AESHASH + +QT_FUNCTION_TARGET(AES) +static size_t aeshash(const uchar *p, size_t len, size_t seed) Q_DECL_NOTHROW +{ + __m128i key; + if (sizeof(size_t) == 8) { +#ifdef Q_PROCESSOR_X86_64 + quint64 seededlen = seed ^ len; + __m128i mseed = _mm_cvtsi64_si128(seed); + key = _mm_insert_epi64(mseed, seededlen, 1); +#endif + } else { + quint32 replicated_len = quint16(len) | (quint32(quint16(len)) << 16); + __m128i mseed = _mm_cvtsi32_si128(seed); + key = _mm_insert_epi32(mseed, replicated_len, 1); + key = _mm_unpacklo_epi64(key, key); + } + + // This is inspired by the algorithm in the Go language. See: + // https://github.com/golang/go/blob/894abb5f680c040777f17f9f8ee5a5ab3a03cb94/src/runtime/asm_386.s#L902 + // https://github.com/golang/go/blob/894abb5f680c040777f17f9f8ee5a5ab3a03cb94/src/runtime/asm_amd64.s#L903 + // + // Even though we're using the AESENC instruction from the CPU, this code + // is not encryption and this routine makes no claim to be + // cryptographically secure. We're simply using the instruction that performs + // the scrambling round (step 3 in [1]) because it's just very good at + // spreading the bits around. + // + // [1] https://en.wikipedia.org/wiki/Advanced_Encryption_Standard#High-level_description_of_the_algorithm + + // hash 16 bytes, running 3 scramble rounds of AES on itself (like label "final1") + const auto hash16bytes = [](__m128i &state0, __m128i data) QT_FUNCTION_TARGET(AES) { + state0 = _mm_xor_si128(state0, data); + state0 = _mm_aesenc_si128(state0, state0); + state0 = _mm_aesenc_si128(state0, state0); + state0 = _mm_aesenc_si128(state0, state0); + }; + + __m128i state0 = key; + auto src = reinterpret_cast(p); + + if (len < 16) + goto lt16; + if (len < 32) + goto lt32; + + // rounds of 32 bytes + { + // Make state1 = ~state0: + __m128i one = _mm_cmpeq_epi64(key, key); + __m128i state1 = _mm_xor_si128(state0, one); + + // do simplified rounds of 32 bytes: unlike the Go code, we only + // scramble twice and we keep 256 bits of state + const auto srcend = src + (len / 32); + while (src < srcend) { + __m128i data0 = _mm_loadu_si128(src); + __m128i data1 = _mm_loadu_si128(src + 1); + data0 = _mm_xor_si128(data0, state0); + data1 = _mm_xor_si128(data1, state1); + state0 = _mm_aesenc_si128(state0, state0); + state1 = _mm_aesenc_si128(state1, state1); + state0 = _mm_aesenc_si128(state0, state0); + state1 = _mm_aesenc_si128(state1, state1); + src += 2; + } + state0 = _mm_xor_si128(state0, state1); + } + len &= 0x1f; + + // do we still have 16 or more bytes? + if (len & 0x10) { +lt32: + __m128i data = _mm_loadu_si128(src); + hash16bytes(state0, data); + ++src; + } + len &= 0xf; + +lt16: + if (len) { + // load the last chunk of data + // We're going to load 16 bytes and mask zero the part we don't care + // (the hash of a short string is different from the hash of a longer + // including NULLs at the end because the length is in the key) + // WARNING: this may produce valgrind warnings, but it's safe + + __m128i data; + + if (Q_LIKELY(quintptr(src + 1) & 0xff0)) { + // same page, we definitely can't fault: + // load all 16 bytes and mask off the bytes past the end of the source + static const qint8 maskarray[] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + __m128i mask = _mm_loadu_si128(reinterpret_cast(maskarray + 15 - len)); + data = _mm_loadu_si128(src); + data = _mm_and_si128(data, mask); + } else { + // too close to the end of the page, it could fault: + // load 16 bytes ending at the data end, then shuffle them to the beginning + static const qint8 shufflecontrol[] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 + }; + __m128i control = _mm_loadu_si128(reinterpret_cast(shufflecontrol + 15 - len)); + p = reinterpret_cast(src - 1); + data = _mm_loadu_si128(reinterpret_cast(p + len)); + data = _mm_shuffle_epi8(data, control); + } + + hash16bytes(state0, data); + } + + // extract state0 +# if QT_POINTER_SIZE == 8 + return _mm_cvtsi128_si64(state0); +# else + return _mm_cvtsi128_si32(state0); +# endif +} +#endif + size_t qHashBits(const void *p, size_t size, size_t seed) noexcept { +#ifdef QT_BOOTSTRAPPED + // the seed is always 0 in bootstrapped mode (no seed generation code), + // so help the compiler do dead code elimination + seed = 0; +#endif +#ifdef AESHASH + if (seed && qCpuHasFeature(AES) && qCpuHasFeature(SSE4_2)) + return aeshash(reinterpret_cast(p), size, seed); +#endif if (size <= QT_POINTER_SIZE) return murmurhash(p, size, seed); -- cgit v1.2.3