diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2018-05-14 23:23:24 -0700 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2018-06-27 16:29:14 +0000 |
commit | 10e5ec25576eea821f2c767e62d00f80e9a01c94 (patch) | |
tree | af1962f2e25dfdec81b3cc83d4f51ba3d57ad72c /src/corelib/tools/qbytearray.cpp | |
parent | cd64a96b31f57e522ab8d29c8357acf384012ebe (diff) |
QByteArray: implement qstricmp with SSE 4.1
Using SSE 4.1 because of the need for PMINUB.
Change-Id: Ib48364abee9f464c96c6fffd152ebd3f8ea7fe94
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'src/corelib/tools/qbytearray.cpp')
-rw-r--r-- | src/corelib/tools/qbytearray.cpp | 75 |
1 files changed, 67 insertions, 8 deletions
diff --git a/src/corelib/tools/qbytearray.cpp b/src/corelib/tools/qbytearray.cpp index 4d2003334c..651f260f0e 100644 --- a/src/corelib/tools/qbytearray.cpp +++ b/src/corelib/tools/qbytearray.cpp @@ -47,6 +47,7 @@ #include "qlocale_p.h" #include "qlocale_tools_p.h" #include "private/qnumeric_p.h" +#include "private/qsimd_p.h" #include "qstringalgorithms_p.h" #include "qscopedpointer.h" #include "qbytearray_p.h" @@ -410,14 +411,72 @@ int qstricmp(const char *str1, const char *str2) { const uchar *s1 = reinterpret_cast<const uchar *>(str1); const uchar *s2 = reinterpret_cast<const uchar *>(str2); - int res; - uchar c; - if (!s1 || !s2) - return s1 ? 1 : (s2 ? -1 : 0); - for (; !(res = (c = latin1_lowercased[*s1]) - latin1_lowercased[*s2]); s1++, s2++) - if (!c) // strings are equal - break; - return res; + if (!s1) + return s2 ? -1 : 0; + if (!s2) + return 1; + + enum { Incomplete = 256 }; + qptrdiff offset = 0; + auto innerCompare = [=, &offset](qptrdiff max, bool unlimited) { + max += offset; + do { + uchar c = latin1_lowercased[s1[offset]]; + int res = c - latin1_lowercased[s2[offset]]; + if (Q_UNLIKELY(res)) + return res; + if (Q_UNLIKELY(!c)) + return 0; + ++offset; + } while (unlimited || offset < max); + return int(Incomplete); + }; + +#ifdef __SSE4_1__ + enum { PageSize = 4096, PageMask = PageSize - 1 }; + const __m128i zero = _mm_setzero_si128(); + forever { + // Calculate how many bytes we can load until we cross a page boundary + // for either source. This isn't an exact calculation, just something + // very quick. + quintptr u1 = quintptr(s1 + offset); + quintptr u2 = quintptr(s2 + offset); + uint n = PageSize - ((u1 | u2) & PageMask); + + qptrdiff maxoffset = offset + n; + for ( ; offset + 16 <= maxoffset; offset += sizeof(__m128i)) { + // load 16 bytes from either source + __m128i a = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s1 + offset)); + __m128i b = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s2 + offset)); + + // compare the two against each oher + __m128i cmp = _mm_cmpeq_epi8(a, b); + + // find NUL terminators too + cmp = _mm_min_epu8(cmp, a); + cmp = _mm_cmpeq_epi8(cmp, zero); + + // was there any difference or a NUL? + uint mask = _mm_movemask_epi8(cmp); + if (mask) { + // yes, find out where + uint start = qCountTrailingZeroBits(mask); + uint end = sizeof(mask) * 8 - qCountLeadingZeroBits(mask); + Q_ASSUME(end >= start); + offset += start; + n = end - start; + break; + } + } + + // using SIMD could cause a page fault, so iterate byte by byte + int res = innerCompare(n, false); + if (res != Incomplete) + return res; + } +#endif + + return innerCompare(-1, true); } /*! \relates QByteArray |