diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2017-08-20 15:31:03 -0700 |
---|---|---|
committer | Thiago Macieira <thiago.macieira@intel.com> | 2017-09-26 18:19:35 +0000 |
commit | f7350ac99ed961ec2f08d1db55b3c3ce088f33f0 (patch) | |
tree | 26a35956ab9de6742c2ee966c9f327bfe01984f6 /src/corelib | |
parent | 49154acde3c2c5f45a50dfd5d011c47db8b761f9 (diff) |
Improve qustrlen further by avoiding the alignment prologue
This avoids looping prior to the main 16-byte loop, by performing one
load that may include bytes prior to the start of the string. This is
guaranteed not to fault, since str points to a valid character, but it
may cause Valgrind to print warnings.
Change-Id: I6e9274c1e7444ad48c81fffd14dcae854bba24b2
Reviewed-by: Olivier Goffart (Woboq GmbH) <ogoffart@woboq.com>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib')
-rw-r--r-- | src/corelib/tools/qstring.cpp | 29 |
1 files changed, 19 insertions, 10 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index a0c309c1cf..57a43205b7 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -165,28 +165,37 @@ qssize_t qustrlen(const ushort *str) Q_DECL_NOTHROW qssize_t result = 0; #ifdef __SSE2__ - // progress until we get an aligned pointer - const ushort *ptr = str; - while (*ptr && quintptr(ptr) % 16) - ++ptr; - if (*ptr == 0) - return ptr - str; + // find the 16-byte alignment immediately prior or equal to str + quintptr misalignment = quintptr(str) & 0xf; + Q_ASSERT((misalignment & 1) == 0); + const ushort *ptr = str - (misalignment / 2); // load 16 bytes and see if we have a null // (aligned loads can never segfault) - int mask; const __m128i zeroes = _mm_setzero_si128(); + __m128i data = _mm_load_si128(reinterpret_cast<const __m128i *>(ptr)); + __m128i comparison = _mm_cmpeq_epi16(data, zeroes); + quint32 mask = _mm_movemask_epi8(comparison); + + // ignore the result prior to the beginning of str + mask >>= misalignment; + + // Have we found something in the first block? Need to handle it now + // because of the left shift above. + if (mask) + return qCountTrailingZeroBits(quint32(mask)) / 2; + do { - __m128i data = _mm_load_si128(reinterpret_cast<const __m128i *>(ptr)); ptr += 8; + data = _mm_load_si128(reinterpret_cast<const __m128i *>(ptr)); - __m128i comparison = _mm_cmpeq_epi16(data, zeroes); + comparison = _mm_cmpeq_epi16(data, zeroes); mask = _mm_movemask_epi8(comparison); } while (mask == 0); // found a null uint idx = qCountTrailingZeroBits(quint32(mask)); - return ptr - str - 8 + idx / 2; + return ptr - str + idx / 2; #endif if (sizeof(wchar_t) == sizeof(ushort)) |