From bd0905db95cd1b2f225050390bd50809452ff53f Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Sun, 13 May 2018 14:11:38 -0700 Subject: QString: add a method to do a conversion to Latin1 without checking If the input is already known to be Latin 1, we don't need to check and merge in question marks. QJsonObject already needed this code, now we can make it more efficient. I'll need the same code in CBOR. Change-Id: Ib48364abee9f464c96c6fffd152e508f078404e5 Reviewed-by: Allan Sandfeld Jensen --- src/corelib/serialization/qjson_p.h | 32 ++++++------------------- src/corelib/tools/qstring.cpp | 47 ++++++++++++++++++++++++++++--------- 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/src/corelib/serialization/qjson_p.h b/src/corelib/serialization/qjson_p.h index dc56a49084..feba1faac6 100644 --- a/src/corelib/serialization/qjson_p.h +++ b/src/corelib/serialization/qjson_p.h @@ -69,6 +69,9 @@ QT_BEGIN_NAMESPACE +// in qstring.cpp +void qt_to_latin1_unchecked(uchar *dst, const ushort *uc, qsizetype len); + /* This defines a binary data structure for Json data. The data structure is optimised for fast reading and minimum allocations. The whole data structure can be mmap'ed and used directly. @@ -294,31 +297,10 @@ public: int len = d->length = str.length(); uchar *l = (uchar *)d->latin1; const ushort *uc = (const ushort *)str.unicode(); - int i = 0; -#ifdef __SSE2__ - for ( ; i + 16 <= len; i += 16) { - __m128i chunk1 = _mm_loadu_si128((__m128i*)&uc[i]); // load - __m128i chunk2 = _mm_loadu_si128((__m128i*)&uc[i + 8]); // load - // pack the two vector to 16 x 8bits elements - const __m128i result = _mm_packus_epi16(chunk1, chunk2); - _mm_storeu_si128((__m128i*)&l[i], result); // store - } -# ifdef Q_PROCESSOR_X86_64 - // we can do one more round, of 8 characters - if (i + 8 <= len) { - __m128i chunk = _mm_loadu_si128((__m128i*)&uc[i]); // load - // pack with itself, we'll discard the high part anyway - chunk = _mm_packus_epi16(chunk, chunk); - // unaligned 64-bit store - qToUnaligned(_mm_cvtsi128_si64(chunk), l + i); - i += 8; - } -# endif -#endif - for ( ; i < len; ++i) - l[i] = uc[i]; - for ( ; (quintptr)(l+i) & 0x3; ++i) - l[i] = 0; + qt_to_latin1_unchecked(l, uc, len); + + for ( ; (quintptr)(l+len) & 0x3; ++len) + l[len] = 0; return *this; } diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 81e5e1e884..d045913b87 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -591,7 +591,8 @@ static inline __m128i mergeQuestionMarks(__m128i chunk) } #endif -static void qt_to_latin1(uchar *dst, const ushort *src, int length) +template +static void qt_to_latin1_internal(uchar *dst, const ushort *src, qsizetype length) { #if defined(__SSE2__) uchar *e = dst + length; @@ -600,10 +601,12 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length) // we're going to write to dst[offset..offset+15] (16 bytes) for ( ; dst + offset + 15 < e; offset += 16) { __m128i chunk1 = _mm_loadu_si128((const __m128i*)(src + offset)); // load - chunk1 = mergeQuestionMarks(chunk1); + if (Checked) + chunk1 = mergeQuestionMarks(chunk1); __m128i chunk2 = _mm_loadu_si128((const __m128i*)(src + offset + 8)); // load - chunk2 = mergeQuestionMarks(chunk2); + if (Checked) + chunk2 = mergeQuestionMarks(chunk2); // pack the two vector to 16 x 8bits elements const __m128i result = _mm_packus_epi16(chunk1, chunk2); @@ -614,7 +617,8 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length) // we're going to write to dst[offset..offset+7] (8 bytes) if (dst + offset + 7 < e) { __m128i chunk = _mm_loadu_si128(reinterpret_cast(src + offset)); - chunk = mergeQuestionMarks(chunk); + if (Checked) + chunk = mergeQuestionMarks(chunk); // pack, where the upper half is ignored const __m128i result = _mm_packus_epi16(chunk, chunk); @@ -625,7 +629,8 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length) // we're going to write to dst[offset..offset+3] (4 bytes) if (dst + offset + 3 < e) { __m128i chunk = _mm_loadl_epi64(reinterpret_cast(src + offset)); - chunk = mergeQuestionMarks(chunk); + if (Checked) + chunk = mergeQuestionMarks(chunk); // pack, we'll the upper three quarters const __m128i result = _mm_packus_epi16(chunk, chunk); @@ -637,7 +642,12 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length) dst += offset; src += offset; - return UnrollTailLoop<3>::exec(length, [=](int i) { dst[i] = (src[i]>0xff) ? '?' : (uchar) src[i]; }); + return UnrollTailLoop<3>::exec(length, [=](int i) { + if (Checked) + dst[i] = (src[i]>0xff) ? '?' : (uchar) src[i]; + else + dst[i] = src[i]; + }); # endif #elif defined(__ARM_NEON__) // Refer to the documentation of the SSE2 implementation @@ -652,10 +662,12 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length) uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load src += 8; - const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask - const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark - const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk - chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark + if (Checked) { + const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask + const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark + const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk + chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark + } const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing vst1_u8(dst, result); // store dst += 8; @@ -667,12 +679,25 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length) qt_toLatin1_mips_dsp_asm(dst, src, length); #else while (length--) { - *dst++ = (*src>0xff) ? '?' : (uchar) *src; + if (Checked) + *dst++ = (*src>0xff) ? '?' : (uchar) *src; + else + *dst++ = *src; ++src; } #endif } +static void qt_to_latin1(uchar *dst, const ushort *src, qsizetype length) +{ + qt_to_latin1_internal(dst, src, length); +} + +void qt_to_latin1_unchecked(uchar *dst, const ushort *src, qsizetype length) +{ + qt_to_latin1_internal(dst, src, length); +} + // Unicode case-insensitive comparison static int ucstricmp(const QChar *a, const QChar *ae, const QChar *b, const QChar *be) { -- cgit v1.2.3