diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2018-05-13 09:14:09 -0700 |
---|---|---|
committer | Thiago Macieira <thiago.macieira@intel.com> | 2018-05-15 23:07:11 +0000 |
commit | 1e95a07a5ced774b20adb66b34c31bdfaf566bdc (patch) | |
tree | 777cc63d356b9347c7a40b10173fff16b98f2a0a /src/corelib/codecs/qutfcodec.cpp | |
parent | 40ccf9818829d4b7df0e4e93a84a25cd75a3f678 (diff) |
QString: insert a number of 8-character SIMD loops
We don't have _mm_cvtsi64_si128() (the REX.W expansion of MOVD [0F 6E]),
but we do have _mm_loadl_epi64(), the SSE2 expansion of the MMX MOVQ at
opcode 0F 7E. Ditto for _mm_cvtsi128_si64() and _mm_storel_epi64(). And
those work even in 32-bit mode. By doing this, we can reduce the tail
unrolled loops by half, reducing code size.
I'm not adding these new SIMD sections to -Os builds.
Change-Id: Ib48364abee9f464c96c6fffd152e405310ef67be
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'src/corelib/codecs/qutfcodec.cpp')
-rw-r--r-- | src/corelib/codecs/qutfcodec.cpp | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp index 08cc99f4dc..96be45ff4e 100644 --- a/src/corelib/codecs/qutfcodec.cpp +++ b/src/corelib/codecs/qutfcodec.cpp @@ -102,6 +102,26 @@ static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const return false; } } + + if (end - src >= 8) { + // do eight characters at a time + __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src)); + __m128i packed = _mm_packus_epi16(data, data); + __m128i nonAscii = _mm_cmpgt_epi8(packed, _mm_setzero_si128()); + + // store even non-ASCII + _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), packed); + + uchar n = ~_mm_movemask_epi8(nonAscii); + if (n) { + nextAscii = src + qBitScanReverse(n) + 1; + n = qCountTrailingZeroBits(n); + dst += n; + src += n; + return false; + } + } + return src == end; } @@ -150,6 +170,25 @@ static inline bool simdDecodeAscii(ushort *&dst, const uchar *&nextAscii, const return false; } + + if (end - src >= 8) { + __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src)); + uint n = _mm_movemask_epi8(data) & 0xff; + if (!n) { + // unpack and store + _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), _mm_unpacklo_epi8(data, _mm_setzero_si128())); + } else { + while (!(n & 1)) { + *dst++ = *src++; + n >>= 1; + } + + n = qBitScanReverse(n); + nextAscii = src + n + 1; + return false; + } + } + return src == end; } |