diff options
author | Erik Verbruggen <erik.verbruggen@qt.io> | 2016-12-06 11:55:31 +0100 |
---|---|---|
committer | Erik Verbruggen <erik.verbruggen@qt.io> | 2016-12-14 09:34:25 +0000 |
commit | 311e523ceffc5338e63f0468701e9307d9c3defa (patch) | |
tree | ae21d8f51f4845b15ace40bd31b12213343e8c1b /src | |
parent | 59b80d606eb5bf90c41e6dccc3c3ebc369ff0e94 (diff) |
Aarch64: vectorize findChar
The "algorithm" is the same as the one for x86.
Also added a comment to qt_from_latin1 to indicate that manual
vectorization doesn't bring anything useful.
Change-Id: I6130cbd83c14c22b1bd15d726b26dbc83068b1a6
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/corelib/tools/qstring.cpp | 15 |
1 files changed, 14 insertions, 1 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 8cf058f035..a727f5168c 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -211,7 +211,8 @@ void qt_from_latin1(ushort *dst, const char *str, size_t size) Q_DECL_NOTHROW { /* SIMD: * Unpacking with SSE has been shown to improve performance on recent CPUs - * The same method gives no improvement with NEON. + * The same method gives no improvement with NEON. On Aarch64, clang will do the vectorization + * itself in exactly the same way as one would do it with intrinsics. */ #if defined(__SSE2__) const char *e = str + size; @@ -727,6 +728,18 @@ static int findChar(const QChar *str, int len, QChar ch, int from, [=](int i) { return n - s + i; }); # endif #endif +#if defined(__ARM_NEON__) && defined(Q_PROCESSOR_ARM_64) // vaddv is only available on Aarch64 + const uint16x8_t vmask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 }; + const uint16x8_t ch_vec = vdupq_n_u16(c); + for (const ushort *next = n + 8; next <= e; n = next, next += 8) { + uint16x8_t data = vld1q_u16(n); + uint mask = vaddvq_u16(vandq_u16(vceqq_u16(data, ch_vec), vmask)); + if (ushort(mask)) { + // found a match + return n - s + qCountTrailingZeroBits(mask); + } + } +#endif // aarch64 --n; while (++n != e) if (*n == c) |