summaryrefslogtreecommitdiffstats
path: root/src/corelib
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2017-08-20 15:31:03 -0700
committerThiago Macieira <thiago.macieira@intel.com>2017-09-26 18:19:35 +0000
commitf7350ac99ed961ec2f08d1db55b3c3ce088f33f0 (patch)
tree26a35956ab9de6742c2ee966c9f327bfe01984f6 /src/corelib
parent49154acde3c2c5f45a50dfd5d011c47db8b761f9 (diff)
Improve qustrlen further by avoiding the alignment prologue
This avoids looping prior to the main 16-byte loop, by performing one load that may include bytes prior to the start of the string. This is guaranteed not to fault, since str points to a valid character, but it may cause Valgrind to print warnings. Change-Id: I6e9274c1e7444ad48c81fffd14dcae854bba24b2 Reviewed-by: Olivier Goffart (Woboq GmbH) <ogoffart@woboq.com> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib')
-rw-r--r--src/corelib/tools/qstring.cpp29
1 files changed, 19 insertions, 10 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index a0c309c1cf..57a43205b7 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -165,28 +165,37 @@ qssize_t qustrlen(const ushort *str) Q_DECL_NOTHROW
qssize_t result = 0;
#ifdef __SSE2__
- // progress until we get an aligned pointer
- const ushort *ptr = str;
- while (*ptr && quintptr(ptr) % 16)
- ++ptr;
- if (*ptr == 0)
- return ptr - str;
+ // find the 16-byte alignment immediately prior or equal to str
+ quintptr misalignment = quintptr(str) & 0xf;
+ Q_ASSERT((misalignment & 1) == 0);
+ const ushort *ptr = str - (misalignment / 2);
// load 16 bytes and see if we have a null
// (aligned loads can never segfault)
- int mask;
const __m128i zeroes = _mm_setzero_si128();
+ __m128i data = _mm_load_si128(reinterpret_cast<const __m128i *>(ptr));
+ __m128i comparison = _mm_cmpeq_epi16(data, zeroes);
+ quint32 mask = _mm_movemask_epi8(comparison);
+
+ // ignore the result prior to the beginning of str
+ mask >>= misalignment;
+
+ // Have we found something in the first block? Need to handle it now
+ // because of the left shift above.
+ if (mask)
+ return qCountTrailingZeroBits(quint32(mask)) / 2;
+
do {
- __m128i data = _mm_load_si128(reinterpret_cast<const __m128i *>(ptr));
ptr += 8;
+ data = _mm_load_si128(reinterpret_cast<const __m128i *>(ptr));
- __m128i comparison = _mm_cmpeq_epi16(data, zeroes);
+ comparison = _mm_cmpeq_epi16(data, zeroes);
mask = _mm_movemask_epi8(comparison);
} while (mask == 0);
// found a null
uint idx = qCountTrailingZeroBits(quint32(mask));
- return ptr - str - 8 + idx / 2;
+ return ptr - str + idx / 2;
#endif
if (sizeof(wchar_t) == sizeof(ushort))