From f7350ac99ed961ec2f08d1db55b3c3ce088f33f0 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Sun, 20 Aug 2017 15:31:03 -0700 Subject: Improve qustrlen further by avoiding the alignment prologue This avoids looping prior to the main 16-byte loop, by performing one load that may include bytes prior to the start of the string. This is guaranteed not to fault, since str points to a valid character, but it may cause Valgrind to print warnings. Change-Id: I6e9274c1e7444ad48c81fffd14dcae854bba24b2 Reviewed-by: Olivier Goffart (Woboq GmbH) Reviewed-by: Thiago Macieira --- src/corelib/tools/qstring.cpp | 29 +++++++++++------ .../corelib/tools/qstringview/tst_qstringview.cpp | 37 ++++++++++++++++++---- 2 files changed, 50 insertions(+), 16 deletions(-) diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index a0c309c1cf..57a43205b7 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -165,28 +165,37 @@ qssize_t qustrlen(const ushort *str) Q_DECL_NOTHROW qssize_t result = 0; #ifdef __SSE2__ - // progress until we get an aligned pointer - const ushort *ptr = str; - while (*ptr && quintptr(ptr) % 16) - ++ptr; - if (*ptr == 0) - return ptr - str; + // find the 16-byte alignment immediately prior or equal to str + quintptr misalignment = quintptr(str) & 0xf; + Q_ASSERT((misalignment & 1) == 0); + const ushort *ptr = str - (misalignment / 2); // load 16 bytes and see if we have a null // (aligned loads can never segfault) - int mask; const __m128i zeroes = _mm_setzero_si128(); + __m128i data = _mm_load_si128(reinterpret_cast(ptr)); + __m128i comparison = _mm_cmpeq_epi16(data, zeroes); + quint32 mask = _mm_movemask_epi8(comparison); + + // ignore the result prior to the beginning of str + mask >>= misalignment; + + // Have we found something in the first block? Need to handle it now + // because of the left shift above. + if (mask) + return qCountTrailingZeroBits(quint32(mask)) / 2; + do { - __m128i data = _mm_load_si128(reinterpret_cast(ptr)); ptr += 8; + data = _mm_load_si128(reinterpret_cast(ptr)); - __m128i comparison = _mm_cmpeq_epi16(data, zeroes); + comparison = _mm_cmpeq_epi16(data, zeroes); mask = _mm_movemask_epi8(comparison); } while (mask == 0); // found a null uint idx = qCountTrailingZeroBits(quint32(mask)); - return ptr - str - 8 + idx / 2; + return ptr - str + idx / 2; #endif if (sizeof(wchar_t) == sizeof(ushort)) diff --git a/tests/auto/corelib/tools/qstringview/tst_qstringview.cpp b/tests/auto/corelib/tools/qstringview/tst_qstringview.cpp index 48ea5a794c..4174b85f4c 100644 --- a/tests/auto/corelib/tools/qstringview/tst_qstringview.cpp +++ b/tests/auto/corelib/tools/qstringview/tst_qstringview.cpp @@ -364,14 +364,21 @@ void tst_QStringView::basics() const void tst_QStringView::literals() const { #if !defined(Q_OS_WIN) || defined(Q_COMPILER_UNICODE_STRINGS) - // the + ensures it's a pointer, not an array - QCOMPARE(QStringView(+u"Hello").size(), 5); - QStringView sv = u"Hello"; + const char16_t hello[] = u"Hello"; + const char16_t longhello[] = + u"Hello World. This is a much longer message, to exercise qustrlen."; + const char16_t withnull[] = u"a\0zzz"; #else // storage_type is wchar_t - // the + ensures it's a pointer, not an array - QCOMPARE(QStringView(+L"Hello").size(), 5); - QStringView sv = L"Hello"; + const wchar_t hello[] = L"Hello"; + const wchar_t longhello[] = + L"Hello World. This is a much longer message, to exercise qustrlen."; + const wchar_t withnull[] = L"a\0zzz"; #endif + Q_STATIC_ASSERT(sizeof(longhello) >= 16); + + QCOMPARE(QStringView(hello).size(), 5); + QCOMPARE(QStringView(hello + 0).size(), 5); // forces decay to pointer + QStringView sv = hello; QCOMPARE(sv.size(), 5); QVERIFY(!sv.empty()); QVERIFY(!sv.isEmpty()); @@ -390,6 +397,24 @@ void tst_QStringView::literals() const QVERIFY(!sv2.isNull()); QVERIFY(!sv2.empty()); QCOMPARE(sv2.size(), 5); + + QStringView sv3(longhello); + QCOMPARE(size_t(sv3.size()), sizeof(longhello)/sizeof(longhello[0]) - 1); + QCOMPARE(sv3.last(), QLatin1Char('.')); + sv3 = longhello; + QCOMPARE(size_t(sv3.size()), sizeof(longhello)/sizeof(longhello[0]) - 1); + + for (int i = 0; i < sv3.size(); ++i) { + QStringView sv4(longhello + i); + QCOMPARE(size_t(sv4.size()), sizeof(longhello)/sizeof(longhello[0]) - 1 - i); + QCOMPARE(sv4.last(), QLatin1Char('.')); + sv4 = longhello + i; + QCOMPARE(size_t(sv4.size()), sizeof(longhello)/sizeof(longhello[0]) - 1 - i); + } + + // these are different results + QCOMPARE(size_t(QStringView(withnull).size()), sizeof(withnull)/sizeof(withnull[0]) - 1); + QCOMPARE(QStringView(withnull + 0).size(), 1); } void tst_QStringView::at() const -- cgit v1.2.3