summaryrefslogtreecommitdiffstats
path: root/src/corelib
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2013-12-12 23:59:46 -0800
committerThe Qt Project <gerrit-noreply@qt-project.org>2014-01-31 21:51:19 +0100
commit8b77fe9f6419017a0a270be464fa3e8b2771dadb (patch)
tree049ffb29521b19f2a838259284bbd1a11ff5e541 /src/corelib
parent2cfdb3526975130dad9536ec5d3da642021ab0d0 (diff)
Improve qstring.cpp:findChar with SSE2
This updates only the case-sensitive searching. The case-insensitive part requires quite a few Unicode transformations. The benchmarks tried are the plain word-by-word comparison used in Qt 5.2 and in builds without SSE2; the SSE2 benchmark; a benchmark using the SSE4.2 "strchr" instruction. I've run the benchmark both for CPU cyles used as well as bytes/sec scanning strings. Improvement over the Qt 5.2 code: GCC 4.7 GCC 4.9 Clang 3.4 ICC 14 cycles MB/s cycles MB/s cycles MB/s cycles MB/s SSE2 2.1x 2.9x 2.2x 2.9x 2.1x 3.1x 2.2x 3.1x SSE4.2 1.5x 1.7x 1.5x 1.7x 1.5x 1.7x 1.6x 1.8x Once again, the SSE4.2 instruction wasn't as effective as I'd hoped (not to mention that Clang seems to have some bugs emitting it). Change-Id: I57c6e65e91791bb5265965cbd1af7fbd8fe7f588 Reviewed-by: Lars Knoll <lars.knoll@digia.com>
Diffstat (limited to 'src/corelib')
-rw-r--r--src/corelib/tools/qstring.cpp20
1 files changed, 19 insertions, 1 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index 269b592f62..cf85807888 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -347,14 +347,32 @@ static int findChar(const QChar *str, int len, QChar ch, int from,
if (from < 0)
from = qMax(from + len, 0);
if (from < len) {
- const ushort *n = s + from - 1;
+ const ushort *n = s + from;
const ushort *e = s + len;
if (cs == Qt::CaseSensitive) {
+#ifdef __SSE2__
+ __m128i mch = _mm_set1_epi32(c | (c << 16));
+
+ // we're going to read n[0..7] (16 bytes)
+ for (const ushort *next = n + 8; next <= e; n = next, next += 8) {
+ __m128i data = _mm_loadu_si128((__m128i*)n);
+ __m128i result = _mm_cmpeq_epi16(data, mch);
+ uint mask = _mm_movemask_epi8(result);
+ if (ushort(mask)) {
+ // found a match
+ // same as: return n - s + _bit_scan_forward(mask) / 2
+ return (reinterpret_cast<const char *>(n) - reinterpret_cast<const char *>(s)
+ + _bit_scan_forward(mask)) >> 1;
+ }
+ }
+#endif
+ --n;
while (++n != e)
if (*n == c)
return n - s;
} else {
c = foldCase(c);
+ --n;
while (++n != e)
if (foldCase(*n) == c)
return n - s;