diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2013-12-13 13:21:09 -0800 |
---|---|---|
committer | The Qt Project <gerrit-noreply@qt-project.org> | 2014-01-31 21:51:25 +0100 |
commit | 4c9db3989633119756af6fcb61eac5d01d364892 (patch) | |
tree | 3a4e7413c81171d9b42ed3caf0f33df5ae559e48 /src/corelib/tools | |
parent | 3bae7b0a2833c5de9e9b66a7068d2fd9cccabb79 (diff) |
Improve the QString/QLatin1String ucstrncmp with SSE2
Change-Id: I6a7ce281096bb99352132f775ae32fcaf55bd32c
Reviewed-by: Lars Knoll <lars.knoll@digia.com>
Diffstat (limited to 'src/corelib/tools')
-rw-r--r-- | src/corelib/tools/qstring.cpp | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index a2ea3bfdfa..a3b0f26044 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -312,6 +312,57 @@ static int ucstrncmp(const QChar *a, const uchar *c, int l) const ushort *uc = reinterpret_cast<const ushort *>(a); const ushort *e = uc + l; +#ifdef __SSE2__ + __m128i nullmask = _mm_setzero_si128(); + qptrdiff offset = 0; + + // we're going to read uc[offset..offset+15] (32 bytes) + // and c[offset..offset+15] (16 bytes) + for ( ; uc + offset + 15 < e; offset += 16) { + // similar to fromLatin1_helper: + // load Latin 1 data and expand to UTF-16 + __m128i chunk = _mm_loadu_si128((__m128i*)(c + offset)); + __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullmask); + __m128i secondHalf = _mm_unpackhi_epi8(chunk, nullmask); + + // load UTF-16 data and compare + __m128i ucdata1 = _mm_loadu_si128((__m128i*)(uc + offset)); + __m128i ucdata2 = _mm_loadu_si128((__m128i*)(uc + offset + 8)); + __m128i result1 = _mm_cmpeq_epi16(firstHalf, ucdata1); + __m128i result2 = _mm_cmpeq_epi16(secondHalf, ucdata2); + + uint mask = ~(_mm_movemask_epi8(result1) | _mm_movemask_epi8(result2) << 16); + if (mask) { + // found a different character + uint idx = uint(_bit_scan_forward(mask)); + return uc[offset + idx / 2] - c[offset + idx / 2]; + } + } + + // we'll read uc[offset..offset+7] (16 bytes) and c[offset-8..offset+7] (16 bytes) + if (uc + offset + 7 < e) { + // same, but we'll throw away half the data + __m128i chunk = _mm_loadu_si128((__m128i*)(c + offset - 8)); + __m128i secondHalf = _mm_unpackhi_epi8(chunk, nullmask); + + __m128i ucdata = _mm_loadu_si128((__m128i*)(uc + offset)); + __m128i result = _mm_cmpeq_epi16(secondHalf, ucdata); + uint mask = ~_mm_movemask_epi8(result); + if (ushort(mask)) { + // found a different character + uint idx = uint(_bit_scan_forward(mask)); + return uc[offset + idx / 2] - c[offset + idx / 2]; + } + + // still matched + offset += 8; + } + + // reset uc and c + uc += offset; + c += offset; +#endif + while (uc < e) { int diff = *uc - *c; if (diff) |