summaryrefslogtreecommitdiffstats
path: root/src/corelib/codecs
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2018-05-13 09:14:09 -0700
committerThiago Macieira <thiago.macieira@intel.com>2018-05-15 23:07:11 +0000
commit1e95a07a5ced774b20adb66b34c31bdfaf566bdc (patch)
tree777cc63d356b9347c7a40b10173fff16b98f2a0a /src/corelib/codecs
parent40ccf9818829d4b7df0e4e93a84a25cd75a3f678 (diff)
QString: insert a number of 8-character SIMD loops
We don't have _mm_cvtsi64_si128() (the REX.W expansion of MOVD [0F 6E]), but we do have _mm_loadl_epi64(), the SSE2 expansion of the MMX MOVQ at opcode 0F 7E. Ditto for _mm_cvtsi128_si64() and _mm_storel_epi64(). And those work even in 32-bit mode. By doing this, we can reduce the tail unrolled loops by half, reducing code size. I'm not adding these new SIMD sections to -Os builds. Change-Id: Ib48364abee9f464c96c6fffd152e405310ef67be Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'src/corelib/codecs')
-rw-r--r--src/corelib/codecs/qutfcodec.cpp39
1 files changed, 39 insertions, 0 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp
index 08cc99f4dc..96be45ff4e 100644
--- a/src/corelib/codecs/qutfcodec.cpp
+++ b/src/corelib/codecs/qutfcodec.cpp
@@ -102,6 +102,26 @@ static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const
return false;
}
}
+
+ if (end - src >= 8) {
+ // do eight characters at a time
+ __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
+ __m128i packed = _mm_packus_epi16(data, data);
+ __m128i nonAscii = _mm_cmpgt_epi8(packed, _mm_setzero_si128());
+
+ // store even non-ASCII
+ _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), packed);
+
+ uchar n = ~_mm_movemask_epi8(nonAscii);
+ if (n) {
+ nextAscii = src + qBitScanReverse(n) + 1;
+ n = qCountTrailingZeroBits(n);
+ dst += n;
+ src += n;
+ return false;
+ }
+ }
+
return src == end;
}
@@ -150,6 +170,25 @@ static inline bool simdDecodeAscii(ushort *&dst, const uchar *&nextAscii, const
return false;
}
+
+ if (end - src >= 8) {
+ __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src));
+ uint n = _mm_movemask_epi8(data) & 0xff;
+ if (!n) {
+ // unpack and store
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), _mm_unpacklo_epi8(data, _mm_setzero_si128()));
+ } else {
+ while (!(n & 1)) {
+ *dst++ = *src++;
+ n >>= 1;
+ }
+
+ n = qBitScanReverse(n);
+ nextAscii = src + n + 1;
+ return false;
+ }
+ }
+
return src == end;
}