diff options
Diffstat (limited to 'src/corelib/io/qurlrecode.cpp')
-rw-r--r-- | src/corelib/io/qurlrecode.cpp | 142 |
1 files changed, 114 insertions, 28 deletions
diff --git a/src/corelib/io/qurlrecode.cpp b/src/corelib/io/qurlrecode.cpp index a9b23babc0..443ae18b21 100644 --- a/src/corelib/io/qurlrecode.cpp +++ b/src/corelib/io/qurlrecode.cpp @@ -40,6 +40,7 @@ #include "qurl.h" #include "private/qutfcodec_p.h" #include "private/qtools_p.h" +#include "private/qsimd_p.h" QT_BEGIN_NAMESPACE @@ -474,6 +475,88 @@ non_trivial: return 0; } +/* + * Returns true if the input it checked (if it checked anything) is not + * encoded. A return of false indicates there's a percent at \a input that + * needs to be decoded. + */ +#ifdef __SSE2__ +static bool simdCheckNonEncoded(ushort *&output, const ushort *&input, const ushort *end) +{ +# ifdef __AVX2__ + const __m256i percents256 = _mm256_broadcastw_epi16(_mm_cvtsi32_si128('%')); + const __m128i percents = _mm256_castsi256_si128(percents256); +# else + const __m128i percents = _mm_set1_epi16('%'); +# endif + + uint idx = 0; + quint32 mask = 0; + if (input + 16 <= end) { + qptrdiff offset = 0; + for ( ; input + offset + 16 <= end; offset += 16) { +# ifdef __AVX2__ + // do 32 bytes at a time using AVX2 + __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(input + offset)); + __m256i comparison = _mm256_cmpeq_epi16(data, percents256); + mask = _mm256_movemask_epi8(comparison); + _mm256_storeu_si256(reinterpret_cast<__m256i *>(output + offset), data); +# else + // do 32 bytes at a time using unrolled SSE2 + __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input + offset)); + __m128i data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input + offset + 8)); + __m128i comparison1 = _mm_cmpeq_epi16(data1, percents); + __m128i comparison2 = _mm_cmpeq_epi16(data2, percents); + uint mask1 = _mm_movemask_epi8(comparison1); + uint mask2 = _mm_movemask_epi8(comparison2); + + _mm_storeu_si128(reinterpret_cast<__m128i *>(output + offset), data1); + if (!mask1) + _mm_storeu_si128(reinterpret_cast<__m128i *>(output + offset + 8), data2); + mask = mask1 | (mask2 << 16); +# endif + + if (mask) { + idx = qCountTrailingZeroBits(mask) / 2; + break; + } + } + + input += offset; + if (output) + output += offset; + } else if (input + 8 <= end) { + // do 16 bytes at a time + __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(input)); + __m128i comparison = _mm_cmpeq_epi16(data, percents); + mask = _mm_movemask_epi8(comparison); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), data); + idx = qCountTrailingZeroBits(quint16(mask)) / 2; + } else if (input + 4 <= end) { + // do 8 bytes only + __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(input)); + __m128i comparison = _mm_cmpeq_epi16(data, percents); + mask = _mm_movemask_epi8(comparison) & 0xffu; + _mm_storel_epi64(reinterpret_cast<__m128i *>(output), data); + idx = qCountTrailingZeroBits(quint8(mask)) / 2; + } else { + // no percents found (because we didn't check) + return true; + } + + // advance to the next non-encoded + input += idx; + output += idx; + + return !mask; +} +#else +static bool simdCheckNonEncoded(...) +{ + return true; +} +#endif + /*! \since 5.0 \internal @@ -497,16 +580,21 @@ non_trivial: */ static int decode(QString &appendTo, const ushort *begin, const ushort *end) { + // fast check whether there's anything to be decoded in the first place + const ushort *input = QtPrivate::qustrchr(QStringView(begin, end), '%'); + if (Q_LIKELY(input == end)) + return 0; // nothing to do, it was already decoded! + + // detach const int origSize = appendTo.size(); - const ushort *input = begin; - ushort *output = 0; + appendTo.resize(origSize + (end - begin)); + ushort *output = reinterpret_cast<ushort *>(appendTo.begin()) + origSize; + memcpy(static_cast<void *>(output), static_cast<const void *>(begin), (input - begin) * sizeof(ushort)); + output += input - begin; + while (input != end) { - if (*input != '%') { - if (output) - *output++ = *input; - ++input; - continue; - } + // something was encoded + Q_ASSERT(*input == '%'); if (Q_UNLIKELY(end - input < 3 || !isHex(input[1]) || !isHex(input[2]))) { // badly-encoded data @@ -515,27 +603,27 @@ static int decode(QString &appendTo, const ushort *begin, const ushort *end) return end - begin; } - if (Q_UNLIKELY(!output)) { - // detach - appendTo.resize(origSize + (end - begin)); - output = reinterpret_cast<ushort *>(appendTo.begin()) + origSize; - memcpy(static_cast<void *>(output), static_cast<const void *>(begin), (input - begin) * sizeof(ushort)); - output += input - begin; - } - ++input; *output++ = decodeNibble(input[0]) << 4 | decodeNibble(input[1]); if (output[-1] >= 0x80) output[-1] = QChar::ReplacementCharacter; input += 2; - } - if (output) { - int len = output - reinterpret_cast<ushort *>(appendTo.begin()); - appendTo.truncate(len); - return len - origSize; + // search for the next percent, copying from input to output + if (simdCheckNonEncoded(output, input, end)) { + while (input != end) { + ushort uc = *input; + if (uc == '%') + break; + *output++ = uc; + ++input; + } + } } - return 0; + + int len = output - reinterpret_cast<ushort *>(appendTo.begin()); + appendTo.truncate(len); + return len - origSize; } template <size_t N> @@ -603,6 +691,9 @@ qt_urlRecode(QString &appendTo, const QChar *begin, const QChar *end, encoding, actionTable, false); } +// qstring.cpp +bool qt_is_ascii(const char *&ptr, const char *end) Q_DECL_NOTHROW; + /*! \internal \since 5.0 @@ -623,12 +714,7 @@ QString qt_urlRecodeByteArray(const QByteArray &ba) // control points below 0x20 are fine in QString const char *in = ba.constData(); const char *const end = ba.constEnd(); - for ( ; in < end; ++in) { - if (*in & 0x80) - break; - } - - if (in == end) { + if (qt_is_ascii(in, end)) { // no non-ASCII found, we're safe to convert to QString return QString::fromLatin1(ba, ba.size()); } |