From 8b77fe9f6419017a0a270be464fa3e8b2771dadb Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Thu, 12 Dec 2013 23:59:46 -0800 Subject: Improve qstring.cpp:findChar with SSE2 This updates only the case-sensitive searching. The case-insensitive part requires quite a few Unicode transformations. The benchmarks tried are the plain word-by-word comparison used in Qt 5.2 and in builds without SSE2; the SSE2 benchmark; a benchmark using the SSE4.2 "strchr" instruction. I've run the benchmark both for CPU cyles used as well as bytes/sec scanning strings. Improvement over the Qt 5.2 code: GCC 4.7 GCC 4.9 Clang 3.4 ICC 14 cycles MB/s cycles MB/s cycles MB/s cycles MB/s SSE2 2.1x 2.9x 2.2x 2.9x 2.1x 3.1x 2.2x 3.1x SSE4.2 1.5x 1.7x 1.5x 1.7x 1.5x 1.7x 1.6x 1.8x Once again, the SSE4.2 instruction wasn't as effective as I'd hoped (not to mention that Clang seems to have some bugs emitting it). Change-Id: I57c6e65e91791bb5265965cbd1af7fbd8fe7f588 Reviewed-by: Lars Knoll --- src/corelib/tools/qstring.cpp | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 269b592f62..cf85807888 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -347,14 +347,32 @@ static int findChar(const QChar *str, int len, QChar ch, int from, if (from < 0) from = qMax(from + len, 0); if (from < len) { - const ushort *n = s + from - 1; + const ushort *n = s + from; const ushort *e = s + len; if (cs == Qt::CaseSensitive) { +#ifdef __SSE2__ + __m128i mch = _mm_set1_epi32(c | (c << 16)); + + // we're going to read n[0..7] (16 bytes) + for (const ushort *next = n + 8; next <= e; n = next, next += 8) { + __m128i data = _mm_loadu_si128((__m128i*)n); + __m128i result = _mm_cmpeq_epi16(data, mch); + uint mask = _mm_movemask_epi8(result); + if (ushort(mask)) { + // found a match + // same as: return n - s + _bit_scan_forward(mask) / 2 + return (reinterpret_cast(n) - reinterpret_cast(s) + + _bit_scan_forward(mask)) >> 1; + } + } +#endif + --n; while (++n != e) if (*n == c) return n - s; } else { c = foldCase(c); + --n; while (++n != e) if (foldCase(*n) == c) return n - s; -- cgit v1.2.3