summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2013-12-13 13:21:09 -0800
committerThe Qt Project <gerrit-noreply@qt-project.org>2014-01-31 21:51:25 +0100
commit4c9db3989633119756af6fcb61eac5d01d364892 (patch)
tree3a4e7413c81171d9b42ed3caf0f33df5ae559e48 /src/corelib/tools
parent3bae7b0a2833c5de9e9b66a7068d2fd9cccabb79 (diff)
Improve the QString/QLatin1String ucstrncmp with SSE2
Change-Id: I6a7ce281096bb99352132f775ae32fcaf55bd32c Reviewed-by: Lars Knoll <lars.knoll@digia.com>
Diffstat (limited to 'src/corelib/tools')
-rw-r--r--src/corelib/tools/qstring.cpp51
1 files changed, 51 insertions, 0 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index a2ea3bfdfa..a3b0f26044 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -312,6 +312,57 @@ static int ucstrncmp(const QChar *a, const uchar *c, int l)
const ushort *uc = reinterpret_cast<const ushort *>(a);
const ushort *e = uc + l;
+#ifdef __SSE2__
+ __m128i nullmask = _mm_setzero_si128();
+ qptrdiff offset = 0;
+
+ // we're going to read uc[offset..offset+15] (32 bytes)
+ // and c[offset..offset+15] (16 bytes)
+ for ( ; uc + offset + 15 < e; offset += 16) {
+ // similar to fromLatin1_helper:
+ // load Latin 1 data and expand to UTF-16
+ __m128i chunk = _mm_loadu_si128((__m128i*)(c + offset));
+ __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullmask);
+ __m128i secondHalf = _mm_unpackhi_epi8(chunk, nullmask);
+
+ // load UTF-16 data and compare
+ __m128i ucdata1 = _mm_loadu_si128((__m128i*)(uc + offset));
+ __m128i ucdata2 = _mm_loadu_si128((__m128i*)(uc + offset + 8));
+ __m128i result1 = _mm_cmpeq_epi16(firstHalf, ucdata1);
+ __m128i result2 = _mm_cmpeq_epi16(secondHalf, ucdata2);
+
+ uint mask = ~(_mm_movemask_epi8(result1) | _mm_movemask_epi8(result2) << 16);
+ if (mask) {
+ // found a different character
+ uint idx = uint(_bit_scan_forward(mask));
+ return uc[offset + idx / 2] - c[offset + idx / 2];
+ }
+ }
+
+ // we'll read uc[offset..offset+7] (16 bytes) and c[offset-8..offset+7] (16 bytes)
+ if (uc + offset + 7 < e) {
+ // same, but we'll throw away half the data
+ __m128i chunk = _mm_loadu_si128((__m128i*)(c + offset - 8));
+ __m128i secondHalf = _mm_unpackhi_epi8(chunk, nullmask);
+
+ __m128i ucdata = _mm_loadu_si128((__m128i*)(uc + offset));
+ __m128i result = _mm_cmpeq_epi16(secondHalf, ucdata);
+ uint mask = ~_mm_movemask_epi8(result);
+ if (ushort(mask)) {
+ // found a different character
+ uint idx = uint(_bit_scan_forward(mask));
+ return uc[offset + idx / 2] - c[offset + idx / 2];
+ }
+
+ // still matched
+ offset += 8;
+ }
+
+ // reset uc and c
+ uc += offset;
+ c += offset;
+#endif
+
while (uc < e) {
int diff = *uc - *c;
if (diff)