diff options
-rw-r--r-- | src/corelib/codecs/qutfcodec.cpp | 144 | ||||
-rw-r--r-- | src/corelib/tools/qsimd_p.h | 39 |
2 files changed, 163 insertions, 20 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp index b0e0b3f010..20bacb1584 100644 --- a/src/corelib/codecs/qutfcodec.cpp +++ b/src/corelib/codecs/qutfcodec.cpp @@ -45,10 +45,97 @@ #include "qendian.h" #include "qchar.h" +#include "private/qsimd_p.h" + QT_BEGIN_NAMESPACE enum { Endian = 0, Data = 1 }; +#if defined(__SSE2__) && defined(QT_COMPILER_SUPPORTS_SSE2) +static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const ushort *&src, const ushort *end) +{ + // do sixteen characters at a time + for ( ; end - src >= 16; src += 16, dst += 16) { + __m128i data1 = _mm_loadu_si128((__m128i*)src); + __m128i data2 = _mm_loadu_si128(1+(__m128i*)src); + + + // check if everything is ASCII + // the highest ASCII value is U+007F + // Do the packing directly: + // The PACKUSWB instruction has packs a signed 16-bit integer to an unsigned 8-bit + // with saturation. That is, anything from 0x0100 to 0x7fff is saturated to 0xff, + // while all negatives (0x8000 to 0xffff) get saturated to 0x00. To detect non-ASCII, + // we simply do a signed greater-than comparison to 0x00. That means we detect NULs as + // "non-ASCII", but it's an acceptable compromise. + __m128i packed = _mm_packus_epi16(data1, data2); + __m128i nonAscii = _mm_cmpgt_epi8(packed, _mm_setzero_si128()); + + // n will contain 1 bit set per character in [data1, data2] that is non-ASCII (or NUL) + ushort n = ~_mm_movemask_epi8(nonAscii); + if (n) { + // copy the front part that is still ASCII + while (!(n & 1)) { + *dst++ = *src++; + n >>= 1; + } + + // find the next probable ASCII character + // we don't want to load 32 bytes again in this loop if we know there are non-ASCII + // characters still coming + n = _bit_scan_reverse(n); + nextAscii = src + n; + return false; + } + + // pack + _mm_storeu_si128((__m128i*)dst, packed); + } + return src == end; +} + +static inline bool simdDecodeAscii(ushort *&dst, const uchar *&nextAscii, const uchar *&src, const uchar *end) +{ + // do sixteen characters at a time + for ( ; end - src >= 16; src += 16, dst += 16) { + __m128i data = _mm_loadu_si128((__m128i*)src); + + // check if everything is ASCII + // movemask extracts the high bit of every byte, so n is non-zero if something isn't ASCII + uint n = _mm_movemask_epi8(data); + if (n) { + // copy the front part that is still ASCII + while (!(n & 1)) { + *dst++ = *src++; + n >>= 1; + } + + // find the next probable ASCII character + // we don't want to load 16 bytes again in this loop if we know there are non-ASCII + // characters still coming + n = _bit_scan_reverse(n); + nextAscii = src + n; + return false; + } + + // unpack + _mm_storeu_si128((__m128i*)dst, _mm_unpacklo_epi8(data, _mm_setzero_si128())); + _mm_storeu_si128(1+(__m128i*)dst, _mm_unpackhi_epi8(data, _mm_setzero_si128())); + } + return src == end; +} +#else +static inline bool simdEncodeAscii(uchar *, const ushort *, const ushort *, const ushort *) +{ + return false; +} + +static inline bool simdDecodeAscii(ushort *, const uchar *, const uchar *, const uchar *) +{ + return false; +} +#endif + QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len) { // create a QByteArray with the worst case scenario size @@ -58,12 +145,18 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len) const ushort *const end = src + len; while (src != end) { - ushort uc = *src++; - int res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(uc, dst, src, end); - if (res < 0) { - // encoding error - append '?' - *dst++ = '?'; - } + const ushort *nextAscii = end; + if (simdEncodeAscii(dst, nextAscii, src, end)) + break; + + do { + ushort uc = *src++; + int res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(uc, dst, src, end); + if (res < 0) { + // encoding error - append '?' + *dst++ = '?'; + } + } while (src < nextAscii); } result.truncate(dst - reinterpret_cast<uchar *>(const_cast<char *>(result.constData()))); @@ -98,10 +191,21 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve *cursor++ = 0xbf; } + const ushort *nextAscii = src; while (src != end) { - ushort uc = surrogate_high == -1 ? *src++ : surrogate_high; - surrogate_high = -1; - int res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(uc, cursor, src, end); + int res; + ushort uc; + if (surrogate_high != -1) { + uc = surrogate_high; + surrogate_high = -1; + res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(uc, cursor, src, end); + } else { + if (src >= nextAscii && simdEncodeAscii(cursor, nextAscii, src, end)) + break; + + uc = *src++; + res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(uc, cursor, src, end); + } if (Q_LIKELY(res >= 0)) continue; @@ -136,12 +240,18 @@ QString QUtf8::convertToUnicode(const char *chars, int len) const uchar *end = src + len; while (src < end) { - uchar b = *src++; - int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, dst, src, end); - if (res < 0) { - // decoding error - *dst++ = QChar::ReplacementCharacter; - } + const uchar *nextAscii = end; + if (simdDecodeAscii(dst, nextAscii, src, end)) + break; + + do { + uchar b = *src++; + int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, dst, src, end); + if (res < 0) { + // decoding error + *dst++ = QChar::ReplacementCharacter; + } + } while (src < nextAscii); } result.truncate(dst - reinterpret_cast<const ushort *>(result.constData())); @@ -204,7 +314,11 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte // main body, stateless decoding res = 0; + const uchar *nextAscii = src; while (res >= 0 && src < end) { + if (src >= nextAscii && simdDecodeAscii(dst, nextAscii, src, end)) + break; + ch = *src++; res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(ch, dst, src, end); if (!headerdone && res >= 0) { diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index b01c47d4ce..1e428b6aeb 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -72,7 +72,7 @@ * I = intrinsics; C = code generation */ -#ifdef __MINGW64_VERSION_MAJOR +#if defined(__MINGW64_VERSION_MAJOR) || (defined(Q_CC_MSVC) && !defined(Q_OS_WINCE)) #include <intrin.h> #endif @@ -139,10 +139,15 @@ #endif // other x86 intrinsics -#if defined(QT_COMPILER_SUPPORTS_AVX) && defined(Q_CC_GNU) && \ - (!defined(Q_CC_INTEL)|| __INTEL_COMPILER >= 1310 || (__GNUC__ * 100 + __GNUC_MINOR__ < 407)) -#define QT_COMPILER_SUPPORTS_X86INTRIN -#include <x86intrin.h> +#if defined(Q_PROCESSOR_X86) && ((defined(Q_CC_GNU) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 404)) \ + || (defined(Q_CC_CLANG) && (__clang_major__ * 100 + __clang_minor__ >= 208)) \ + || defined(Q_CC_INTEL)) +# define QT_COMPILER_SUPPORTS_X86INTRIN +# ifndef Q_CC_INTEL +// The Intel compiler has no <x86intrin.h> -- all intrinsics are in <immintrin.h>; +// GCC 4.4 and Clang 2.8 added a few more intrinsics there +# include <x86intrin.h> +# endif #endif // NEON intrinsics @@ -241,6 +246,30 @@ static inline uint qCpuFeatures() #define qCpuHasFeature(feature) ((qCompilerCpuFeatures & (feature)) || (qCpuFeatures() & (feature))) +#ifdef Q_PROCESSOR_X86 +// Bit scan functions for x86 +# ifdef Q_CC_MSVC +// MSVC calls it _BitScanReverse and returns the carry flag, which we don't need +static __forceinline unsigned long _bit_scan_reverse(uint val) +{ + unsigned long result; + _BitScanReverse(&result, val); + return result; +} +# elif (defined(Q_CC_CLANG) || (defined(Q_CC_GNU) && __GNUC__ * 100 + __GNUC_MINOR__ < 405)) \ + && !defined(Q_CC_INTEL) +// Clang is missing the intrinsic for _bit_scan_reverse +// GCC only added it in version 4.5 +static inline __attribute__((always_inline)) +unsigned _bit_scan_reverse(unsigned val) +{ + unsigned result; + asm("bsr %1, %0" : "=r" (result) : "r" (val)); + return result; +} +# endif +#endif // Q_PROCESSOR_X86 + #define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \ for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i) |