diff options
Diffstat (limited to 'src/corelib/codecs/qutfcodec.cpp')
-rw-r--r-- | src/corelib/codecs/qutfcodec.cpp | 108 |
1 files changed, 79 insertions, 29 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp index 0dffdd723e..643c8ee475 100644 --- a/src/corelib/codecs/qutfcodec.cpp +++ b/src/corelib/codecs/qutfcodec.cpp @@ -70,9 +70,14 @@ static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const { // do sixteen characters at a time for ( ; end - src >= 16; src += 16, dst += 16) { +# ifdef __AVX2__ + __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src)); + __m128i data1 = _mm256_castsi256_si128(data); + __m128i data2 = _mm256_extracti128_si256(data, 1); +# else __m128i data1 = _mm_loadu_si128((const __m128i*)src); __m128i data2 = _mm_loadu_si128(1+(const __m128i*)src); - +# endif // check if everything is ASCII // the highest ASCII value is U+007F @@ -102,6 +107,26 @@ static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const return false; } } + + if (end - src >= 8) { + // do eight characters at a time + __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src)); + __m128i packed = _mm_packus_epi16(data, data); + __m128i nonAscii = _mm_cmpgt_epi8(packed, _mm_setzero_si128()); + + // store even non-ASCII + _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), packed); + + uchar n = ~_mm_movemask_epi8(nonAscii); + if (n) { + nextAscii = src + qBitScanReverse(n) + 1; + n = qCountTrailingZeroBits(n); + dst += n; + src += n; + return false; + } + } + return src == end; } @@ -150,11 +175,52 @@ static inline bool simdDecodeAscii(ushort *&dst, const uchar *&nextAscii, const return false; } + + if (end - src >= 8) { + __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src)); + uint n = _mm_movemask_epi8(data) & 0xff; + if (!n) { + // unpack and store + _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), _mm_unpacklo_epi8(data, _mm_setzero_si128())); + } else { + while (!(n & 1)) { + *dst++ = *src++; + n >>= 1; + } + + n = qBitScanReverse(n); + nextAscii = src + n + 1; + return false; + } + } + return src == end; } static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end, const uchar *&nextAscii) { +#ifdef __AVX2__ + // do 32 characters at a time + // (this is similar to simdTestMask in qstring.cpp) + const __m256i mask = _mm256_set1_epi8(0x80); + for ( ; end - src >= 32; src += 32) { + __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src)); + if (_mm256_testz_si256(mask, data)) + continue; + + uint n = _mm256_movemask_epi8(data); + Q_ASSUME(n); + + // find the next probable ASCII character + // we don't want to load 32 bytes again in this loop if we know there are non-ASCII + // characters still coming + nextAscii = src + qBitScanReverse(n) + 1; + + // return the non-ASCII character + return src + qCountTrailingZeroBits(n); + } +#endif + // do sixteen characters at a time for ( ; end - src >= 16; src += 16) { __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src)); @@ -695,26 +761,16 @@ QByteArray QUtf16::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conv char *data = d.data(); if (!state || !(state->flags & QTextCodec::IgnoreHeader)) { QChar bom(QChar::ByteOrderMark); - if (endian == BigEndianness) { - data[0] = bom.row(); - data[1] = bom.cell(); - } else { - data[0] = bom.cell(); - data[1] = bom.row(); - } + if (endian == BigEndianness) + qToBigEndian(bom.unicode(), data); + else + qToLittleEndian(bom.unicode(), data); data += 2; } - if (endian == BigEndianness) { - for (int i = 0; i < len; ++i) { - *(data++) = uc[i].row(); - *(data++) = uc[i].cell(); - } - } else { - for (int i = 0; i < len; ++i) { - *(data++) = uc[i].cell(); - *(data++) = uc[i].row(); - } - } + if (endian == BigEndianness) + qToBigEndian<ushort>(uc, len, data); + else + qToLittleEndian<ushort>(uc, len, data); if (state) { state->remainingChars = 0; @@ -830,20 +886,14 @@ QByteArray QUtf32::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conv if (endian == BigEndianness) { while (i.hasNext()) { uint cp = i.next(); - - *(data++) = cp >> 24; - *(data++) = (cp >> 16) & 0xff; - *(data++) = (cp >> 8) & 0xff; - *(data++) = cp & 0xff; + qToBigEndian(cp, data); + data += 4; } } else { while (i.hasNext()) { uint cp = i.next(); - - *(data++) = cp & 0xff; - *(data++) = (cp >> 8) & 0xff; - *(data++) = (cp >> 16) & 0xff; - *(data++) = cp >> 24; + qToLittleEndian(cp, data); + data += 4; } } |