2 files changed, 43 insertions, 36 deletions
diff --git a/src/corelib/serialization/qjson_p.h b/src/corelib/serialization/qjson_p.h
index dc56a49084..feba1faac6 100644
--- a/src/corelib/serialization/qjson_p.h
+++ b/src/corelib/serialization/qjson_p.h
@@ -69,6 +69,9 @@
 
 QT_BEGIN_NAMESPACE
 
+// in qstring.cpp
+void qt_to_latin1_unchecked(uchar *dst, const ushort *uc, qsizetype len);
+
 /*
   This defines a binary data structure for Json data. The data structure is optimised for fast reading
   and minimum allocations. The whole data structure can be mmap'ed and used directly.
@@ -294,31 +297,10 @@ public:
         int len = d->length = str.length();
         uchar *l = (uchar *)d->latin1;
         const ushort *uc = (const ushort *)str.unicode();
-        int i = 0;
-#ifdef __SSE2__
-        for ( ; i + 16 <= len; i += 16) {
-            __m128i chunk1 = _mm_loadu_si128((__m128i*)&uc[i]); // load
-            __m128i chunk2 = _mm_loadu_si128((__m128i*)&uc[i + 8]); // load
-            // pack the two vector to 16 x 8bits elements
-            const __m128i result = _mm_packus_epi16(chunk1, chunk2);
-            _mm_storeu_si128((__m128i*)&l[i], result); // store
-        }
-#  ifdef Q_PROCESSOR_X86_64
-        // we can do one more round, of 8 characters
-        if (i + 8 <= len) {
-            __m128i chunk = _mm_loadu_si128((__m128i*)&uc[i]); // load
-            // pack with itself, we'll discard the high part anyway
-            chunk = _mm_packus_epi16(chunk, chunk);
-            // unaligned 64-bit store
-            qToUnaligned(_mm_cvtsi128_si64(chunk), l + i);
-            i += 8;
-        }
-#  endif
-#endif
-        for ( ; i < len; ++i)
-            l[i] = uc[i];
-        for ( ; (quintptr)(l+i) & 0x3; ++i)
-            l[i] = 0;
+        qt_to_latin1_unchecked(l, uc, len);
+
+        for ( ; (quintptr)(l+len) & 0x3; ++len)
+            l[len] = 0;
         return *this;
     }
 
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index 81e5e1e884..d045913b87 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -591,7 +591,8 @@ static inline __m128i mergeQuestionMarks(__m128i chunk)
 }
 #endif
 
-static void qt_to_latin1(uchar *dst, const ushort *src, int length)
+template <bool Checked>
+static void qt_to_latin1_internal(uchar *dst, const ushort *src, qsizetype length)
 {
 #if defined(__SSE2__)
     uchar *e = dst + length;
@@ -600,10 +601,12 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length)
     // we're going to write to dst[offset..offset+15] (16 bytes)
     for ( ; dst + offset + 15 < e; offset += 16) {
         __m128i chunk1 = _mm_loadu_si128((const __m128i*)(src + offset)); // load
-        chunk1 = mergeQuestionMarks(chunk1);
+        if (Checked)
+            chunk1 = mergeQuestionMarks(chunk1);
 
         __m128i chunk2 = _mm_loadu_si128((const __m128i*)(src + offset + 8)); // load
-        chunk2 = mergeQuestionMarks(chunk2);
+        if (Checked)
+            chunk2 = mergeQuestionMarks(chunk2);
 
         // pack the two vector to 16 x 8bits elements
         const __m128i result = _mm_packus_epi16(chunk1, chunk2);
@@ -614,7 +617,8 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length)
     // we're going to write to dst[offset..offset+7] (8 bytes)
     if (dst + offset + 7 < e) {
         __m128i chunk = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + offset));
-        chunk = mergeQuestionMarks(chunk);
+        if (Checked)
+            chunk = mergeQuestionMarks(chunk);
 
         // pack, where the upper half is ignored
         const __m128i result = _mm_packus_epi16(chunk, chunk);
@@ -625,7 +629,8 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length)
     // we're going to write to dst[offset..offset+3] (4 bytes)
     if (dst + offset + 3 < e) {
         __m128i chunk = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src + offset));
-        chunk = mergeQuestionMarks(chunk);
+        if (Checked)
+            chunk = mergeQuestionMarks(chunk);
 
         // pack, we'll the upper three quarters
         const __m128i result = _mm_packus_epi16(chunk, chunk);
@@ -637,7 +642,12 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length)
     dst += offset;
     src += offset;
 
-    return UnrollTailLoop<3>::exec(length, [=](int i) { dst[i] = (src[i]>0xff) ? '?' : (uchar) src[i]; });
+    return UnrollTailLoop<3>::exec(length, [=](int i) {
+        if (Checked)
+            dst[i] = (src[i]>0xff) ? '?' : (uchar) src[i];
+        else
+            dst[i] = src[i];
+    });
 #  endif
 #elif defined(__ARM_NEON__)
     // Refer to the documentation of the SSE2 implementation
@@ -652,10 +662,12 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length)
             uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
             src += 8;
 
-            const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
-            const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
-            const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
-            chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
+            if (Checked) {
+                const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
+                const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
+                const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
+                chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
+            }
             const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
             vst1_u8(dst, result); // store
             dst += 8;
@@ -667,12 +679,25 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length)
     qt_toLatin1_mips_dsp_asm(dst, src, length);
 #else
     while (length--) {
-        *dst++ = (*src>0xff) ? '?' : (uchar) *src;
+        if (Checked)
+            *dst++ = (*src>0xff) ? '?' : (uchar) *src;
+        else
+            *dst++ = *src;
         ++src;
     }
 #endif
 }
 
+static void qt_to_latin1(uchar *dst, const ushort *src, qsizetype length)
+{
+    qt_to_latin1_internal<true>(dst, src, length);
+}
+
+void qt_to_latin1_unchecked(uchar *dst, const ushort *src, qsizetype length)
+{
+    qt_to_latin1_internal<false>(dst, src, length);
+}
+
 // Unicode case-insensitive comparison
 static int ucstricmp(const QChar *a, const QChar *ae, const QChar *b, const QChar *be)
 {