diff options
-rw-r--r-- | src/corelib/json/qjson_p.h | 2 | ||||
-rw-r--r-- | src/corelib/tools/qhash.cpp | 8 | ||||
-rw-r--r-- | src/corelib/tools/qsimd.cpp | 22 | ||||
-rw-r--r-- | src/corelib/tools/qsimd_p.h | 37 | ||||
-rw-r--r-- | src/corelib/tools/qstring.cpp | 2 |
5 files changed, 65 insertions, 6 deletions
diff --git a/src/corelib/json/qjson_p.h b/src/corelib/json/qjson_p.h index 0b3f517990..59d0c91785 100644 --- a/src/corelib/json/qjson_p.h +++ b/src/corelib/json/qjson_p.h @@ -402,7 +402,7 @@ public: // pack with itself, we'll discard the high part anyway chunk = _mm_packus_epi16(chunk, chunk); // unaligned 64-bit store - *(quint64*)&l[i] = _mm_cvtsi128_si64(chunk); + qUnalignedStore(l + i, _mm_cvtsi128_si64(chunk)); i += 8; } # endif diff --git a/src/corelib/tools/qhash.cpp b/src/corelib/tools/qhash.cpp index 775e1364a1..ac9e08de8b 100644 --- a/src/corelib/tools/qhash.cpp +++ b/src/corelib/tools/qhash.cpp @@ -106,24 +106,24 @@ static uint crc32(const Char *ptr, size_t len, uint h) p += 8; for ( ; p <= e; p += 8) - h2 = _mm_crc32_u64(h2, *reinterpret_cast<const qlonglong *>(p - 8)); + h2 = _mm_crc32_u64(h2, qUnalignedLoad<qlonglong>(p - 8)); h = h2; p -= 8; len = e - p; if (len & 4) { - h = _mm_crc32_u32(h, *reinterpret_cast<const uint *>(p)); + h = _mm_crc32_u32(h, qUnalignedLoad<uint>(p)); p += 4; } # else p += 4; for ( ; p <= e; p += 4) - h = _mm_crc32_u32(h, *reinterpret_cast<const uint *>(p - 4)); + h = _mm_crc32_u32(h, qUnalignedLoad<uint>(p - 4)); p -= 4; len = e - p; # endif if (len & 2) { - h = _mm_crc32_u16(h, *reinterpret_cast<const ushort *>(p)); + h = _mm_crc32_u16(h, qUnalignedLoad<ushort>(p)); p += 2; } if (sizeof(Char) == 1 && len & 1) diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp index f07eb098f2..5ca2ce4c6f 100644 --- a/src/corelib/tools/qsimd.cpp +++ b/src/corelib/tools/qsimd.cpp @@ -716,4 +716,26 @@ void qDumpCPUFeatures() puts(""); } +/*! + \internal + \fn T qUnalignedLoad(const void *ptr) + \since 5.6.1 + + Loads a \c{T} from address \a ptr, which may be misaligned. + + Use of this function avoid the undefined behavior that the C++ standard + otherwise attributes to unaligned loads. +*/ + +/*! + \internal + \fn void qUnalignedStore(void *ptr, T t) + \since 5.6.1 + + Stores \a t to address \a ptr, which may be misaligned. + + Use of this function avoid the undefined behavior that the C++ standard + otherwise attributes to unaligned stores. +*/ + QT_END_NAMESPACE diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index 12a329f36c..8171184ad2 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -470,6 +470,43 @@ unsigned _bit_scan_forward(unsigned val) #define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \ for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i) +// these defines are copied from qendian.h +// in Qt 5.7, they have been moved to qglobal.h +// drop them when merging this to 5.7 +#ifdef __has_builtin +# define QT_HAS_BUILTIN(x) __has_builtin(x) +#else +# define QT_HAS_BUILTIN(x) 0 +#endif + +template <typename T> +Q_ALWAYS_INLINE +T qUnalignedLoad(const void *ptr) Q_DECL_NOTHROW +{ + T result; +#if QT_HAS_BUILTIN(__builtin_memcpy) + __builtin_memcpy +#else + memcpy +#endif + /*memcpy*/(&result, ptr, sizeof result); + return result; +} + +template <typename T> +Q_ALWAYS_INLINE +void qUnalignedStore(void *ptr, T t) Q_DECL_NOTHROW +{ +#if QT_HAS_BUILTIN(__builtin_memcpy) + __builtin_memcpy +#else + memcpy +#endif + /*memcpy*/(ptr, &t, sizeof t); +} + +#undef QT_HAS_BUILTIN + QT_END_NAMESPACE #endif // QSIMD_P_H diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 39ec66c7f1..9924b606c5 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -577,7 +577,7 @@ static int ucstrncmp(const QChar *a, const uchar *c, int l) // we'll read uc[offset..offset+7] (16 bytes) and c[offset..offset+7] (8 bytes) if (uc + offset + 7 < e) { // same, but we're using an 8-byte load - __m128i chunk = _mm_cvtsi64_si128(*(const long long *)(c + offset)); + __m128i chunk = _mm_cvtsi64_si128(qUnalignedLoad<long long>(c + offset)); __m128i secondHalf = _mm_unpacklo_epi8(chunk, nullmask); __m128i ucdata = _mm_loadu_si128((const __m128i*)(uc + offset)); |