From 570ef11c28b885817a69523835fac40d9e0d1f4e Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Wed, 12 Sep 2018 18:10:11 -0700 Subject: QUtf8Codec: Use one 32-byte load instead of two 16-byte ones on AVX2 The number of instructions is the same. But if the CPU can issue 32-byte-wide loads, this will be faster. For CPUs that would do two 16-byte loads, this is no worse than current code. Change-Id: I8f261579aad648fdb4f0fffd1553d060b4fc852f Reviewed-by: Allan Sandfeld Jensen --- src/corelib/codecs/qutfcodec.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp index 26c68cdee5..8bc1294c49 100644 --- a/src/corelib/codecs/qutfcodec.cpp +++ b/src/corelib/codecs/qutfcodec.cpp @@ -70,9 +70,14 @@ static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const { // do sixteen characters at a time for ( ; end - src >= 16; src += 16, dst += 16) { +# ifdef __AVX2__ + __m256i data = _mm256_loadu_si256(reinterpret_cast(src)); + __m128i data1 = _mm256_castsi256_si128(data); + __m128i data2 = _mm256_extracti128_si256(data, 1); +# else __m128i data1 = _mm_loadu_si128((const __m128i*)src); __m128i data2 = _mm_loadu_si128(1+(const __m128i*)src); - +# endif // check if everything is ASCII // the highest ASCII value is U+007F -- cgit v1.2.3