Aarch64: vectorize findChar

The "algorithm" is the same as the one for x86. Also added a comment to qt_from_latin1 to indicate that manual vectorization doesn't bring anything useful. Change-Id: I6130cbd83c14c22b1bd15d726b26dbc83068b1a6 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
author: Erik Verbruggen <erik.verbruggen@qt.io> 2016-12-06 11:55:31 +0100
committer: Erik Verbruggen <erik.verbruggen@qt.io> 2016-12-14 09:34:25 +0000
commit: 311e523ceffc5338e63f0468701e9307d9c3defa (patch)
tree: ae21d8f51f4845b15ace40bd31b12213343e8c1b /src
parent: 59b80d606eb5bf90c41e6dccc3c3ebc369ff0e94 (diff)
1 files changed, 14 insertions, 1 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index 8cf058f035..a727f5168c 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -211,7 +211,8 @@ void qt_from_latin1(ushort *dst, const char *str, size_t size) Q_DECL_NOTHROW
 {
     /* SIMD:
      * Unpacking with SSE has been shown to improve performance on recent CPUs
-     * The same method gives no improvement with NEON.
+     * The same method gives no improvement with NEON. On Aarch64, clang will do the vectorization
+     * itself in exactly the same way as one would do it with intrinsics.
      */
 #if defined(__SSE2__)
     const char *e = str + size;
@@ -727,6 +728,18 @@ static int findChar(const QChar *str, int len, QChar ch, int from,
                                            [=](int i) { return n - s + i; });
 #  endif
 #endif
+#if defined(__ARM_NEON__) && defined(Q_PROCESSOR_ARM_64) // vaddv is only available on Aarch64
+            const uint16x8_t vmask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
+            const uint16x8_t ch_vec = vdupq_n_u16(c);
+            for (const ushort *next = n + 8; next <= e; n = next, next += 8) {
+                uint16x8_t data = vld1q_u16(n);
+                uint mask = vaddvq_u16(vandq_u16(vceqq_u16(data, ch_vec), vmask));
+                if (ushort(mask)) {
+                    // found a match
+                    return n - s + qCountTrailingZeroBits(mask);
+                }
+            }
+#endif // aarch64
             --n;
             while (++n != e)
                 if (*n == c)
author	Erik Verbruggen <erik.verbruggen@qt.io>	2016-12-06 11:55:31 +0100
committer	Erik Verbruggen <erik.verbruggen@qt.io>	2016-12-14 09:34:25 +0000
commit	311e523ceffc5338e63f0468701e9307d9c3defa (patch)
tree	ae21d8f51f4845b15ace40bd31b12213343e8c1b /src
parent	59b80d606eb5bf90c41e6dccc3c3ebc369ff0e94 (diff)