summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/corelib/serialization/qjson_p.h32
-rw-r--r--src/corelib/tools/qstring.cpp47
2 files changed, 43 insertions, 36 deletions
diff --git a/src/corelib/serialization/qjson_p.h b/src/corelib/serialization/qjson_p.h
index dc56a49084..feba1faac6 100644
--- a/src/corelib/serialization/qjson_p.h
+++ b/src/corelib/serialization/qjson_p.h
@@ -69,6 +69,9 @@
QT_BEGIN_NAMESPACE
+// in qstring.cpp
+void qt_to_latin1_unchecked(uchar *dst, const ushort *uc, qsizetype len);
+
/*
This defines a binary data structure for Json data. The data structure is optimised for fast reading
and minimum allocations. The whole data structure can be mmap'ed and used directly.
@@ -294,31 +297,10 @@ public:
int len = d->length = str.length();
uchar *l = (uchar *)d->latin1;
const ushort *uc = (const ushort *)str.unicode();
- int i = 0;
-#ifdef __SSE2__
- for ( ; i + 16 <= len; i += 16) {
- __m128i chunk1 = _mm_loadu_si128((__m128i*)&uc[i]); // load
- __m128i chunk2 = _mm_loadu_si128((__m128i*)&uc[i + 8]); // load
- // pack the two vector to 16 x 8bits elements
- const __m128i result = _mm_packus_epi16(chunk1, chunk2);
- _mm_storeu_si128((__m128i*)&l[i], result); // store
- }
-# ifdef Q_PROCESSOR_X86_64
- // we can do one more round, of 8 characters
- if (i + 8 <= len) {
- __m128i chunk = _mm_loadu_si128((__m128i*)&uc[i]); // load
- // pack with itself, we'll discard the high part anyway
- chunk = _mm_packus_epi16(chunk, chunk);
- // unaligned 64-bit store
- qToUnaligned(_mm_cvtsi128_si64(chunk), l + i);
- i += 8;
- }
-# endif
-#endif
- for ( ; i < len; ++i)
- l[i] = uc[i];
- for ( ; (quintptr)(l+i) & 0x3; ++i)
- l[i] = 0;
+ qt_to_latin1_unchecked(l, uc, len);
+
+ for ( ; (quintptr)(l+len) & 0x3; ++len)
+ l[len] = 0;
return *this;
}
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index 81e5e1e884..d045913b87 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -591,7 +591,8 @@ static inline __m128i mergeQuestionMarks(__m128i chunk)
}
#endif
-static void qt_to_latin1(uchar *dst, const ushort *src, int length)
+template <bool Checked>
+static void qt_to_latin1_internal(uchar *dst, const ushort *src, qsizetype length)
{
#if defined(__SSE2__)
uchar *e = dst + length;
@@ -600,10 +601,12 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length)
// we're going to write to dst[offset..offset+15] (16 bytes)
for ( ; dst + offset + 15 < e; offset += 16) {
__m128i chunk1 = _mm_loadu_si128((const __m128i*)(src + offset)); // load
- chunk1 = mergeQuestionMarks(chunk1);
+ if (Checked)
+ chunk1 = mergeQuestionMarks(chunk1);
__m128i chunk2 = _mm_loadu_si128((const __m128i*)(src + offset + 8)); // load
- chunk2 = mergeQuestionMarks(chunk2);
+ if (Checked)
+ chunk2 = mergeQuestionMarks(chunk2);
// pack the two vector to 16 x 8bits elements
const __m128i result = _mm_packus_epi16(chunk1, chunk2);
@@ -614,7 +617,8 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length)
// we're going to write to dst[offset..offset+7] (8 bytes)
if (dst + offset + 7 < e) {
__m128i chunk = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + offset));
- chunk = mergeQuestionMarks(chunk);
+ if (Checked)
+ chunk = mergeQuestionMarks(chunk);
// pack, where the upper half is ignored
const __m128i result = _mm_packus_epi16(chunk, chunk);
@@ -625,7 +629,8 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length)
// we're going to write to dst[offset..offset+3] (4 bytes)
if (dst + offset + 3 < e) {
__m128i chunk = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src + offset));
- chunk = mergeQuestionMarks(chunk);
+ if (Checked)
+ chunk = mergeQuestionMarks(chunk);
// pack, we'll the upper three quarters
const __m128i result = _mm_packus_epi16(chunk, chunk);
@@ -637,7 +642,12 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length)
dst += offset;
src += offset;
- return UnrollTailLoop<3>::exec(length, [=](int i) { dst[i] = (src[i]>0xff) ? '?' : (uchar) src[i]; });
+ return UnrollTailLoop<3>::exec(length, [=](int i) {
+ if (Checked)
+ dst[i] = (src[i]>0xff) ? '?' : (uchar) src[i];
+ else
+ dst[i] = src[i];
+ });
# endif
#elif defined(__ARM_NEON__)
// Refer to the documentation of the SSE2 implementation
@@ -652,10 +662,12 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length)
uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
src += 8;
- const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
- const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
- const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
- chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
+ if (Checked) {
+ const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
+ const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
+ const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
+ chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
+ }
const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
vst1_u8(dst, result); // store
dst += 8;
@@ -667,12 +679,25 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length)
qt_toLatin1_mips_dsp_asm(dst, src, length);
#else
while (length--) {
- *dst++ = (*src>0xff) ? '?' : (uchar) *src;
+ if (Checked)
+ *dst++ = (*src>0xff) ? '?' : (uchar) *src;
+ else
+ *dst++ = *src;
++src;
}
#endif
}
+static void qt_to_latin1(uchar *dst, const ushort *src, qsizetype length)
+{
+ qt_to_latin1_internal<true>(dst, src, length);
+}
+
+void qt_to_latin1_unchecked(uchar *dst, const ushort *src, qsizetype length)
+{
+ qt_to_latin1_internal<false>(dst, src, length);
+}
+
// Unicode case-insensitive comparison
static int ucstricmp(const QChar *a, const QChar *ae, const QChar *b, const QChar *be)
{