summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2018-05-15 14:13:24 -0700
committerThiago Macieira <thiago.macieira@intel.com>2018-08-03 20:26:56 +0000
commitdc8d4fe30f2f88982166b5c593f90deb3964fb52 (patch)
tree8f5b2e08e2bdc0248e09b6f5af8a9c8f3d6851c5 /src
parent98dda3f5ac8e96fe34a343b4fc8cab1dc6939513 (diff)
QUrl: Add qustrchr() and use it to speed up the fast URL full decoding
The character search in the findChar() static function in qstring.cpp is more efficient than what we had in qurlrecode.cpp and there's no point in duplicating it. It also has a Neon implementation. So make the implementation available for use in QtPrivate::qustrchr(). This also simplifies the implementation. Change-Id: Ib48364abee9f464c96c6fffd152eedd0cd8ad7f8 Reviewed-by: Samuel Gaist <samuel.gaist@idiap.ch> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src')
-rw-r--r--src/corelib/io/qurlrecode.cpp84
-rw-r--r--src/corelib/tools/qstring.cpp128
-rw-r--r--src/corelib/tools/qstringalgorithms.h1
3 files changed, 106 insertions, 107 deletions
diff --git a/src/corelib/io/qurlrecode.cpp b/src/corelib/io/qurlrecode.cpp
index 0c7b1df716..443ae18b21 100644
--- a/src/corelib/io/qurlrecode.cpp
+++ b/src/corelib/io/qurlrecode.cpp
@@ -500,9 +500,7 @@ static bool simdCheckNonEncoded(ushort *&output, const ushort *&input, const ush
__m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(input + offset));
__m256i comparison = _mm256_cmpeq_epi16(data, percents256);
mask = _mm256_movemask_epi8(comparison);
-
- if (output)
- _mm256_storeu_si256(reinterpret_cast<__m256i *>(output + offset), data);
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(output + offset), data);
# else
// do 32 bytes at a time using unrolled SSE2
__m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input + offset));
@@ -512,11 +510,9 @@ static bool simdCheckNonEncoded(ushort *&output, const ushort *&input, const ush
uint mask1 = _mm_movemask_epi8(comparison1);
uint mask2 = _mm_movemask_epi8(comparison2);
- if (output) {
- _mm_storeu_si128(reinterpret_cast<__m128i *>(output + offset), data1);
- if (!mask1)
- _mm_storeu_si128(reinterpret_cast<__m128i *>(output + offset + 8), data2);
- }
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(output + offset), data1);
+ if (!mask1)
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(output + offset + 8), data2);
mask = mask1 | (mask2 << 16);
# endif
@@ -534,21 +530,14 @@ static bool simdCheckNonEncoded(ushort *&output, const ushort *&input, const ush
__m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input));
__m128i comparison = _mm_cmpeq_epi16(data, percents);
mask = _mm_movemask_epi8(comparison);
-
- // speculatively store everything
- if (output)
- _mm_storeu_si128(reinterpret_cast<__m128i *>(output), data);
-
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(output), data);
idx = qCountTrailingZeroBits(quint16(mask)) / 2;
} else if (input + 4 <= end) {
// do 8 bytes only
__m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(input));
__m128i comparison = _mm_cmpeq_epi16(data, percents);
mask = _mm_movemask_epi8(comparison) & 0xffu;
-
- if (output)
- _mm_storel_epi64(reinterpret_cast<__m128i *>(output), data);
-
+ _mm_storel_epi64(reinterpret_cast<__m128i *>(output), data);
idx = qCountTrailingZeroBits(quint8(mask)) / 2;
} else {
// no percents found (because we didn't check)
@@ -557,8 +546,7 @@ static bool simdCheckNonEncoded(ushort *&output, const ushort *&input, const ush
// advance to the next non-encoded
input += idx;
- if (output)
- output += idx;
+ output += idx;
return !mask;
}
@@ -592,26 +580,22 @@ static bool simdCheckNonEncoded(...)
*/
static int decode(QString &appendTo, const ushort *begin, const ushort *end)
{
- const int origSize = appendTo.size();
- const ushort *input = begin;
- ushort *output = 0;
- while (input != end) {
- if (simdCheckNonEncoded(output, input, end)) {
- ushort uc = 0;
- while (input != end) {
- uc = *input;
- if (uc == '%')
- break;
- if (output)
- *output++ = uc;
- ++input;
- }
+ // fast check whether there's anything to be decoded in the first place
+ const ushort *input = QtPrivate::qustrchr(QStringView(begin, end), '%');
+ if (Q_LIKELY(input == end))
+ return 0; // nothing to do, it was already decoded!
- if (uc != '%')
- break; // we're done
- }
+ // detach
+ const int origSize = appendTo.size();
+ appendTo.resize(origSize + (end - begin));
+ ushort *output = reinterpret_cast<ushort *>(appendTo.begin()) + origSize;
+ memcpy(static_cast<void *>(output), static_cast<const void *>(begin), (input - begin) * sizeof(ushort));
+ output += input - begin;
+ while (input != end) {
// something was encoded
+ Q_ASSERT(*input == '%');
+
if (Q_UNLIKELY(end - input < 3 || !isHex(input[1]) || !isHex(input[2]))) {
// badly-encoded data
appendTo.resize(origSize + (end - begin));
@@ -619,27 +603,27 @@ static int decode(QString &appendTo, const ushort *begin, const ushort *end)
return end - begin;
}
- if (Q_UNLIKELY(!output)) {
- // detach
- appendTo.resize(origSize + (end - begin));
- output = reinterpret_cast<ushort *>(appendTo.begin()) + origSize;
- memcpy(static_cast<void *>(output), static_cast<const void *>(begin), (input - begin) * sizeof(ushort));
- output += input - begin;
- }
-
++input;
*output++ = decodeNibble(input[0]) << 4 | decodeNibble(input[1]);
if (output[-1] >= 0x80)
output[-1] = QChar::ReplacementCharacter;
input += 2;
- }
- if (output) {
- int len = output - reinterpret_cast<ushort *>(appendTo.begin());
- appendTo.truncate(len);
- return len - origSize;
+ // search for the next percent, copying from input to output
+ if (simdCheckNonEncoded(output, input, end)) {
+ while (input != end) {
+ ushort uc = *input;
+ if (uc == '%')
+ break;
+ *output++ = uc;
+ ++input;
+ }
+ }
}
- return 0;
+
+ int len = output - reinterpret_cast<ushort *>(appendTo.begin());
+ appendTo.truncate(len);
+ return len - origSize;
}
template <size_t N>
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index b25e63e115..ec274d8abf 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -227,10 +227,8 @@ template <uint MaxCount> struct UnrollTailLoop
return returnIfExited;
bool check = loopCheck(i);
- if (check) {
- const RetType &retval = returnIfFailed(i);
- return retval;
- }
+ if (check)
+ return returnIfFailed(i);
return UnrollTailLoop<MaxCount - 1>::exec(count - 1, returnIfExited, loopCheck, returnIfFailed, i + 1);
}
@@ -253,6 +251,72 @@ inline RetType UnrollTailLoop<0>::exec(Number, RetType returnIfExited, Functor1,
}
#endif
+/*!
+ * \internal
+ *
+ * Searches for character \a \c in the string \a str and returns a pointer to
+ * it. Unlike strchr() and wcschr() (but like glibc's strchrnul()), if the
+ * character is not found, this function returns a pointer to the end of the
+ * string -- that is, \c{str.end()}.
+ */
+const ushort *QtPrivate::qustrchr(QStringView str, ushort c) noexcept
+{
+ const ushort *n = reinterpret_cast<const ushort *>(str.begin());
+ const ushort *e = reinterpret_cast<const ushort *>(str.end());
+
+#ifdef __SSE2__
+ __m128i mch = _mm_set1_epi32(c | (c << 16));
+
+ // we're going to read n[0..7] (16 bytes)
+ for (const ushort *next = n + 8; next <= e; n = next, next += 8) {
+ __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(n));
+ __m128i result = _mm_cmpeq_epi16(data, mch);
+ uint mask = _mm_movemask_epi8(result);
+ if (ushort(mask)) {
+ // found a match
+ return n + (qCountTrailingZeroBits(mask) >> 1);
+ }
+ }
+
+# if !defined(__OPTIMIZE_SIZE__)
+ // we're going to read n[0..3] (8 bytes)
+ if (e - n > 3) {
+ __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(n));
+ __m128i result = _mm_cmpeq_epi16(data, mch);
+ uint mask = _mm_movemask_epi8(result);
+ if (uchar(mask)) {
+ // found a match
+ return n + (qCountTrailingZeroBits(mask) >> 1);
+ }
+
+ n += 4;
+ }
+
+ return UnrollTailLoop<3>::exec(e - n, e,
+ [=](int i) { return n[i] == c; },
+ [=](int i) { return n + i; });
+# endif
+#elif defined(__ARM_NEON__) && defined(Q_PROCESSOR_ARM_64) // vaddv is only available on Aarch64
+ const uint16x8_t vmask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
+ const uint16x8_t ch_vec = vdupq_n_u16(c);
+ for (const ushort *next = n + 8; next <= e; n = next, next += 8) {
+ uint16x8_t data = vld1q_u16(n);
+ uint mask = vaddvq_u16(vandq_u16(vceqq_u16(data, ch_vec), vmask));
+ if (ushort(mask)) {
+ // found a match
+ return n + qCountTrailingZeroBits(mask);
+ }
+ }
+#endif // aarch64
+
+ --n;
+ while (++n != e)
+ if (*n == c)
+ return n;
+
+ return n;
+}
+
#ifdef __SSE2__
// Scans from \a ptr to \a end until \a maskval is non-zero. Returns true if
// the no non-zero was found. Returns false and updates \a ptr to point to the
@@ -1183,59 +1247,9 @@ static int findChar(const QChar *str, int len, QChar ch, int from,
const ushort *n = s + from;
const ushort *e = s + len;
if (cs == Qt::CaseSensitive) {
-#ifdef __SSE2__
- __m128i mch = _mm_set1_epi32(c | (c << 16));
-
- // we're going to read n[0..7] (16 bytes)
- for (const ushort *next = n + 8; next <= e; n = next, next += 8) {
- __m128i data = _mm_loadu_si128((const __m128i*)n);
- __m128i result = _mm_cmpeq_epi16(data, mch);
- uint mask = _mm_movemask_epi8(result);
- if (ushort(mask)) {
- // found a match
- // same as: return n - s + _bit_scan_forward(mask) / 2
- return (reinterpret_cast<const char *>(n) - reinterpret_cast<const char *>(s)
- + qCountTrailingZeroBits(mask)) >> 1;
- }
- }
-
-# if !defined(__OPTIMIZE_SIZE__)
- // we're going to read n[0..3] (8 bytes)
- if (e - n > 3) {
- __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(n));
- __m128i result = _mm_cmpeq_epi16(data, mch);
- uint mask = _mm_movemask_epi8(result);
- if (uchar(mask)) {
- // found a match
- // same as: return n - s + _bit_scan_forward(mask) / 2
- return (reinterpret_cast<const char *>(n) - reinterpret_cast<const char *>(s)
- + qCountTrailingZeroBits(mask)) >> 1;
- }
-
- n += 4;
- }
-
- return UnrollTailLoop<3>::exec(e - n, -1,
- [=](int i) { return n[i] == c; },
- [=](int i) { return n - s + i; });
-# endif
-#endif
-#if defined(__ARM_NEON__) && defined(Q_PROCESSOR_ARM_64) // vaddv is only available on Aarch64
- const uint16x8_t vmask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
- const uint16x8_t ch_vec = vdupq_n_u16(c);
- for (const ushort *next = n + 8; next <= e; n = next, next += 8) {
- uint16x8_t data = vld1q_u16(n);
- uint mask = vaddvq_u16(vandq_u16(vceqq_u16(data, ch_vec), vmask));
- if (ushort(mask)) {
- // found a match
- return n - s + qCountTrailingZeroBits(mask);
- }
- }
-#endif // aarch64
- --n;
- while (++n != e)
- if (*n == c)
- return n - s;
+ n = QtPrivate::qustrchr(QStringView(n, e), c);
+ if (n != e)
+ return n - s;
} else {
c = foldCase(c);
--n;
diff --git a/src/corelib/tools/qstringalgorithms.h b/src/corelib/tools/qstringalgorithms.h
index 8446d85239..cc0eda71f3 100644
--- a/src/corelib/tools/qstringalgorithms.h
+++ b/src/corelib/tools/qstringalgorithms.h
@@ -56,6 +56,7 @@ template <typename T> class QVector;
namespace QtPrivate {
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION qsizetype qustrlen(const ushort *str) Q_DECL_NOTHROW;
+Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION const ushort *qustrchr(QStringView str, ushort ch) noexcept;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringView lhs, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) Q_DECL_NOTHROW;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringView lhs, QLatin1String rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) Q_DECL_NOTHROW;